Remove TH/THC Storage (#68556)

Summary:
Fixes https://github.com/pytorch/pytorch/issues/67852

cc ezyang bhosmer smessmer ljk53 bdhirsh

Pull Request resolved: https://github.com/pytorch/pytorch/pull/68556

Reviewed By: ejguan

Differential Revision: D32652758

Pulled By: ngimel

fbshipit-source-id: 170956fca112606f9008abe09b92c6ddc411be09
This commit is contained in:
Kurt Mohler
2021-11-29 12:49:57 -08:00
committed by Facebook GitHub Bot
parent f5fa91ba2e
commit d9e7d85390
52 changed files with 254 additions and 702 deletions

View File

@ -349,7 +349,6 @@ filegroup(
name = "th_srcs",
srcs = [
"aten/src/TH/THGeneral.cpp",
"aten/src/TH/THStorageFunctions.cpp",
"aten/src/TH/THTensor.cpp",
],
)
@ -385,8 +384,6 @@ filegroup(
"aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp",
"aten/src/ATen/native/sparse/cuda/SparseBlas.cpp",
"aten/src/ATen/native/sparse/cuda/SparseBlasImpl.cpp",
"aten/src/THC/THCStorage.cpp",
"aten/src/THC/THCTensor.cpp",
],
)
@ -396,7 +393,6 @@ filegroup(
"aten/src/THC/THCReduceApplyUtils.cu.cc",
"aten/src/THC/THCSleep.cu.cc",
"aten/src/THC/THCSortUtils.cu.cc",
"aten/src/THC/THCStorage.cu.cc",
"aten/src/THC/THCTensor.cu.cc",
"aten/src/THC/THCTensorCopy.cu.cc",
"aten/src/THC/THCTensorMathScan.cu.cc",

View File

@ -10,7 +10,7 @@ class DataPtr;
namespace hip {
namespace HIPCachingAllocatorMasqueradingAsCUDA {
Allocator* get();
C10_HIP_API Allocator* get();
C10_HIP_API void recordStreamMasqueradingAsCUDA(const DataPtr& ptr, HIPStreamMasqueradingAsCUDA stream);
} // namespace HIPCachingAllocatorMasqueradingAsCUDA

View File

@ -17,7 +17,7 @@ multiple variants of the library, summarized here:
PyTorch employs reference counting in order to permit tensors to provide
differing views on a common underlying storage. For example, when you call
view() on a Tensor, a new THTensor is allocated with differing dimensions,
but it shares the same THStorage with the original tensor.
but it shares the same c10::StorageImpl with the original tensor.
Unfortunately, this means we are in the business of manually tracking reference
counts inside our C library code. Fortunately, for most of our library code implementing
@ -63,9 +63,9 @@ of freeing it. If that function holds on to a pointer to the object, it
will `retain` it itself.
```
THLongStorage *inferred_size = THLongStorage_newInferSize(size, numel);
THByteStorage *inferred_size = THByteStorage_newInferSize(size, numel);
THTensor_(setStorage)(self, tensor->storage, tensor->storageOffset, inferred_size, NULL);
THLongStorage_free(inferred_size);
c10::raw::intrusive_ptr::decref(inferred_size);
```
Sometimes, you have a tensor in hand which you'd like to use directly, but

View File

@ -1,11 +1,10 @@
set(Aten_TH_AVX_extra_src)
set(hdr
THGeneral.h THHalf.h THStorage.h THStorageFunctions.h THTensor.h)
THGeneral.h THHalf.h THTensor.h)
set(ATen_TH_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/THGeneral.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THStorageFunctions.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THTensor.cpp
)
# Remember that PARENT_SCOPE variables are not in the current scope
@ -35,17 +34,12 @@ install(FILES
TH.h
${CMAKE_CURRENT_BINARY_DIR}/THGeneral.h
THGenerateByteType.h
THStorage.h
THStorageFunctions.h
THTensor.h
THHalf.h
THTensor.hpp
THStorageFunctions.hpp
DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/TH")
install(FILES
generic/THStorage.cpp
generic/THStorage.h
generic/THTensor.cpp
generic/THTensor.h
# See Note [TH abstraction violation]

View File

@ -2,8 +2,6 @@
#define TH_INC
#include <TH/THGeneral.h>
#include <TH/THStorageFunctions.h>
#include <TH/THTensor.h>
#endif

View File

@ -1,4 +0,0 @@
#pragma once
#include <TH/THStorageFunctions.h>
// Compatibility header. Use THStorageFunctions.h instead if you need this.

View File

@ -1,39 +0,0 @@
#include <climits>
#include <c10/util/intrusive_ptr.h>
#include <TH/THStorageFunctions.hpp>
// NOLINTNEXTLINE(bugprone-suspicious-include)
#include <TH/generic/THStorage.cpp>
#include <TH/THGenerateByteType.h>
#include <ATen/native/Resize.h>
THStorage* THStorage_new() {
THStorage* storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
0,
c10::GetDefaultCPUAllocator(),
true)
.release();
return storage;
}
// Free a non-weak pointer to THStorage
void THStorage_free(THStorage* storage) {
if (!storage) {
return;
}
c10::raw::intrusive_ptr::decref(storage);
}
void THStorage_retain(THStorage *storage)
{
if (storage) {
c10::raw::intrusive_ptr::incref(storage);
}
}
void THStorage_resizeBytes(THStorage* storage, ptrdiff_t size_bytes) {
at::native::resize_bytes_cpu(storage, size_bytes);
}

View File

@ -1,11 +0,0 @@
#pragma once
#include <TH/THGeneral.h>
#define THStorage_(NAME) TH_CONCAT_4(TH,Real,Storage_,NAME)
#include <TH/generic/THStorage.h>
#include <TH/THGenerateByteType.h>
// This exists to have a data-type independent way of freeing (necessary for THPPointer).
TH_API void THStorage_free(THStorage *storage);

View File

@ -1,38 +0,0 @@
#pragma once
// STOP!!! Thinking of including this header directly? Please
// read Note [TH abstraction violation]
#include <c10/core/Storage.h>
#include <c10/core/StorageImpl.h>
#include <TH/THStorageFunctions.h>
#include <c10/core/ScalarType.h>
#include <c10/core/ScalarTypeToTypeMeta.h>
// Note [Weak references for intrusive refcounting]
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// Here's the scheme:
//
// - refcount == number of strong references to the object
// weakcount == number of weak references to the object,
// plus one more if refcount > 0
//
// - THStorage stays live as long as there are any strong
// or weak pointers to it (weakcount > 0, since strong
// references count as a +1 to weakcount)
//
// - finalizers are called and data_ptr is deallocated when refcount == 0
//
// - Once refcount == 0, it can never again be > 0 (the transition
// from > 0 to == 0 is monotonic)
//
// - When you access THStorage via a weak pointer, you must
// atomically increment the use count, if it is greater than 0.
// If it is not, you must report that the storage is dead.
//
TH_CPP_API THStorage* THStorage_new();
TH_API void THStorage_retain(THStorage *storage);
TH_API void THStorage_resizeBytes(THStorage* storage, ptrdiff_t size_bytes);

View File

@ -16,7 +16,7 @@ void THTensor_free(THTensor *self)
c10::raw::intrusive_ptr::decref(self);
}
void THTensor_setStorage(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_) {
void THTensor_setStorage(THTensor *self, c10::StorageImpl *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_) {
c10::raw::intrusive_ptr::incref(storage_);
THTensor_wrap(self).set_(at::Storage(c10::intrusive_ptr<at::StorageImpl>::reclaim(storage_)), storageOffset_, size_, stride_);
}

View File

@ -1,7 +1,7 @@
#ifndef TH_TENSOR_INC
#define TH_TENSOR_INC
#include <TH/THStorageFunctions.h>
#include <TH/THGeneral.h>
#define THTensor_(NAME) TH_CONCAT_4(TH,Real,Tensor_,NAME)

View File

@ -4,7 +4,7 @@
// read Note [TH abstraction violation]
#include <TH/THTensor.h>
#include <TH/THStorageFunctions.hpp>
#include <c10/core/StorageImpl.h>
#include <atomic>
#include <ATen/ATen.h>
@ -18,4 +18,4 @@ inline at::Tensor THTensor_wrap(THTensor* tensor) {
TH_API void THTensor_free(THTensor *self);
TH_CPP_API void THTensor_setStorage(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_);
TH_CPP_API void THTensor_setStorage(THTensor *self, c10::StorageImpl *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_);

View File

@ -1,129 +0,0 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "TH/generic/THStorage.cpp"
#else
#include <ATen/MapAllocator.h>
#include <c10/core/CPUAllocator.h>
#include <c10/util/irange.h>
#include <new>
THStorage* THStorage_(new)(void)
{
return THStorage_new();
}
THStorage* THStorage_(newWithSize)(ptrdiff_t size)
{
THStorage* storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
#ifdef THQUANTIZED
size * sizeof(quantized_t),
#else
size * sizeof(scalar_t),
#endif
c10::GetDefaultCPUAllocator(),
true)
.release();
return storage;
}
THStorage* THStorage_(newWithAllocator)(ptrdiff_t size,
at::Allocator *allocator)
{
THStorage* storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
#ifdef THQUANTIZED
size * sizeof(quantized_t),
#else
size * sizeof(scalar_t),
#endif
allocator,
true)
.release();
return storage;
}
THStorage* THStorage_(newWithMapping)(const char *filename, ptrdiff_t size, int flags)
{
size_t actual_size = -1;
THStorage* storage =
c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
size * sizeof(scalar_t),
at::MapAllocator::makeDataPtr(
filename, flags, size * sizeof(scalar_t), &actual_size),
/* allocator */ nullptr,
false)
.release();
if (size <= 0) {
storage->set_nbytes(actual_size);
}
return storage;
}
void THStorage_(retain)(THStorage *storage)
{
THStorage_retain(storage);
}
void THStorage_(free)(THStorage *storage)
{
THStorage_free(storage);
}
THStorage* THStorage_(newWithDataAndAllocator)(at::DataPtr&& data, ptrdiff_t size,
at::Allocator* allocator) {
THStorage* storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
#ifdef THQUANTIZED
size * sizeof(quantized_t),
#else
size * sizeof(scalar_t),
#endif
std::move(data),
allocator,
allocator != nullptr)
.release();
return storage;
}
void THStorage_(resizeBytes)(THStorage* storage, ptrdiff_t size_bytes) {
return THStorage_resizeBytes(storage, size_bytes);
}
void THStorage_(fill)(THStorage *storage, scalar_t value)
{
const auto type_meta = caffe2::TypeMeta::Make<scalar_t>();
const size_t numel = storage->nbytes() / type_meta.itemsize();
for (const auto i : c10::irange(numel)) {
storage->data<scalar_t>()[i] = value;
}
}
void THStorage_(set)(THStorage *self, ptrdiff_t idx, scalar_t value)
{
const auto type_meta = caffe2::TypeMeta::Make<scalar_t>();
const auto numel = static_cast<int64_t>(self->nbytes() / type_meta.itemsize());
THArgCheck((idx >= 0) && (idx < numel), 2, "out of bounds");
self->data<scalar_t>()[idx] = value;
}
scalar_t THStorage_(get)(const THStorage *self, ptrdiff_t idx)
{
const auto type_meta = caffe2::TypeMeta::Make<scalar_t>();
const auto numel = static_cast<int64_t>(self->nbytes() / type_meta.itemsize());
THArgCheck((idx >= 0) && (idx < numel), 2, "out of bounds");
return self->data<scalar_t>()[idx];
}
void THStorage_(swap)(THStorage *storage1, THStorage *storage2)
{
std::swap(*storage1, *storage2);
}
#endif

View File

@ -1,54 +0,0 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "TH/generic/THStorage.h"
#else
#include <c10/core/Allocator.h>
#include <c10/core/StorageImpl.h>
/* on pourrait avoir un liste chainee
qui initialise math, lab structures (or more).
mouais -- complique.
Pb: THMapStorage is kind of a class
THLab_()... comment je m'en sors?
en template, faudrait que je les instancie toutes!!! oh boy!
Et comment je sais que c'est pour Cuda? Le type float est le meme dans les <>
au bout du compte, ca serait sur des pointeurs float/double... etc... = facile.
primitives??
*/
// Struct definition is moved to THStorage.hpp (so this file stays C compatible)
#define THStorage at::StorageImpl
// These used to be distinct types; for some measure of backwards compatibility and documentation
// alias these to the single THStorage type.
#define THByteStorage THStorage
/* slow access -- checks everything */
TH_API void THStorage_(set)(THStorage*, ptrdiff_t, scalar_t);
TH_API scalar_t THStorage_(get)(const THStorage*, ptrdiff_t);
TH_API THStorage* THStorage_(new)(void);
TH_API THStorage* THStorage_(newWithSize)(ptrdiff_t size);
TH_API THStorage* THStorage_(newWithMapping)(const char *filename, ptrdiff_t size, int flags);
TH_API THStorage* THStorage_(newWithAllocator)(ptrdiff_t size,
c10::Allocator* allocator);
TH_API THStorage* THStorage_(newWithDataAndAllocator)(
at::DataPtr&& data, ptrdiff_t size, at::Allocator* allocator);
/* should not differ with API */
TH_API void THStorage_(setFlag)(THStorage *storage, const char flag);
TH_API void THStorage_(clearFlag)(THStorage *storage, const char flag);
TH_API void THStorage_(retain)(THStorage *storage);
TH_API void THStorage_(swap)(THStorage *storage1, THStorage *storage2);
/* might differ with other API (like CUDA) */
TH_API void THStorage_(free)(THStorage *storage);
TH_API void THStorage_(resizeBytes)(THStorage* storage, ptrdiff_t size_bytes);
TH_API void THStorage_(fill)(THStorage *storage, scalar_t value);
#endif

View File

@ -10,7 +10,7 @@
/**** creation methods ****/
THTensor *THTensor_(newWithStorage1d)(THStorage *storage, ptrdiff_t storageOffset,
THTensor *THTensor_(newWithStorage1d)(c10::StorageImpl *storage, ptrdiff_t storageOffset,
int64_t size0, int64_t stride0)
{
c10::raw::intrusive_ptr::incref(storage);

View File

@ -24,7 +24,7 @@
#define THComplexDoubleTensor THTensor
/**** creation methods ****/
TH_API THTensor *THTensor_(newWithStorage1d)(THStorage *storage_, ptrdiff_t storageOffset_,
TH_API THTensor *THTensor_(newWithStorage1d)(c10::StorageImpl *storage_, ptrdiff_t storageOffset_,
int64_t size0_, int64_t stride0_);
#endif

View File

@ -3,11 +3,9 @@ set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE}
PARENT_SCOPE)
set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS}
${CMAKE_CURRENT_SOURCE_DIR}/THCStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THCTensor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THCSleep.cu
${CMAKE_CURRENT_SOURCE_DIR}/THCStorage.cu
PARENT_SCOPE)
install(FILES
@ -15,19 +13,14 @@ install(FILES
THCGeneral.h
THCGeneral.hpp
THCSleep.h
THCStorage.h
THCTensor.h
THCAtomics.cuh
THCGenerateByteType.h
# See Note [TH abstraction violation]
THCTensor.hpp
THCStorage.hpp
DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/THC")
install(FILES
generic/THCStorage.cpp
generic/THCStorage.cu
generic/THCStorage.h
generic/THCTensor.cpp
generic/THCTensor.h
DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/THC/generic")

View File

@ -4,8 +4,6 @@
#include <THC/THCGeneral.h>
#include <c10/cuda/CUDACachingAllocator.h>
#include <THC/THCSleep.h>
#include <THC/THCStorage.h>
#include <THC/THCTensor.h>
#endif

View File

@ -1,38 +0,0 @@
#include <THC/THCStorage.hpp>
#include <THC/THCGeneral.h>
#include <TH/THHalf.h>
#include <new>
#include <c10/cuda/CUDACachingAllocator.h>
#include <THC/generic/THCStorage.cpp>
#include <THC/THCGenerateByteType.h>
#include <c10/util/intrusive_ptr.h>
#include <ATen/native/cuda/Resize.h>
void THCStorage_resizeBytes(
THCState* state,
THCStorage* self,
ptrdiff_t size_bytes_i) {
TORCH_CHECK(!c10::overflows<size_t>(size_bytes_i),
"Requested storage size (", size_bytes_i,
") cannot be represented as a size_t");
const auto size_bytes = static_cast<size_t>(size_bytes_i);
at::native::resize_bytes_cuda(self, size_bytes);
}
int THCStorage_getDevice(THCState* state, const THCStorage* storage) {
return storage->device().index();
}
THCStorage* THCStorage_new(THCState* state) {
THStorage* storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
0,
c10::cuda::CUDACachingAllocator::get(),
true)
.release();
return storage;
}

View File

@ -1,11 +0,0 @@
#include <THC/THCStorage.hpp>
#include <ATen/cuda/ThrustAllocator.h>
#include <thrust/device_ptr.h>
#include <thrust/fill.h>
#if (defined(CUDA_VERSION) && CUDA_VERSION >= 7000) || defined(USE_ROCM)
#include <thrust/system/cuda/execution_policy.h>
#endif
#include <THC/generic/THCStorage.cu>
#include <THC/THCGenerateByteType.h>

View File

@ -1,12 +0,0 @@
#ifndef THC_STORAGE_INC
#define THC_STORAGE_INC
#include <TH/THStorageFunctions.h>
#include <THC/THCGeneral.h>
#define THCStorage_(NAME) TH_CONCAT_4(TH,CReal,Storage_,NAME)
#include <THC/generic/THCStorage.h>
#include <THC/THCGenerateByteType.h>
#endif

View File

@ -1,32 +0,0 @@
#pragma once
// STOP!!! Thinking of including this header directly? Please
// read Note [TH abstraction violation]
#include <THC/THCStorage.h>
// Should work with THStorageClass
#include <TH/THStorageFunctions.hpp>
#include <c10/core/ScalarType.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_fp16.h>
TORCH_CUDA_CU_API THCStorage* THCStorage_new(THCState* state);
TORCH_CUDA_CU_API void THCStorage_retain(THCState* state, THCStorage* storage);
TORCH_CUDA_CU_API void THCStorage_resizeBytes(
THCState* state,
THCStorage* storage,
ptrdiff_t size_bytes);
TORCH_CUDA_CU_API int THCStorage_getDevice(
THCState* state,
const THCStorage* storage);
TORCH_CUDA_CU_API THCStorage* THCStorage_newWithDataAndAllocator(
THCState* state,
at::DataPtr&& data,
ptrdiff_t size,
at::Allocator* allocator);

View File

@ -6,7 +6,7 @@
#include <THC/generic/THCTensor.cpp>
#include <THC/THCGenerateByteType.h>
void THCTensor_setStorage(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_)
void THCTensor_setStorage(THCState *state, THCTensor *self, c10::StorageImpl *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_)
{
c10::raw::intrusive_ptr::incref(storage_);
THTensor_wrap(self).set_(at::Storage(c10::intrusive_ptr<at::StorageImpl>::reclaim(storage_)),

View File

@ -2,7 +2,6 @@
#define THC_TENSOR_INC
#include <TH/THTensor.h>
#include <THC/THCStorage.h>
#include <THC/THCGeneral.h>
#define THCTensor_(NAME) TH_CONCAT_4(TH,CReal,Tensor_,NAME)

View File

@ -5,7 +5,6 @@
#include <THC/THCTensor.h>
#include <TH/THTensor.hpp>
#include <THC/THCStorage.hpp>
#include <THC/THCGeneral.hpp>
#include <ATen/ATen.h>
@ -13,7 +12,7 @@
TORCH_CUDA_CU_API void THCTensor_setStorage(
THCState* state,
THCTensor* self,
THCStorage* storage_,
c10::StorageImpl* storage_,
ptrdiff_t storageOffset_,
at::IntArrayRef size_,
at::IntArrayRef stride_);

View File

@ -1,86 +0,0 @@
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "THC/generic/THCStorage.cpp"
#else
#include <c10/util/intrusive_ptr.h>
#include <c10/util/typeid.h>
void THCStorage_(set)(THCState *state, THCStorage *self, ptrdiff_t index, scalar_t value)
{
THArgCheck(
(index >= 0) && (index < static_cast<int64_t>(self->nbytes() / sizeof(scalar_t))),
2,
"index out of bounds");
cudaStream_t stream = c10::cuda::getCurrentCUDAStream();
at::cuda::memcpy_and_sync(self->data<scalar_t>() + index, &value, sizeof(scalar_t),
cudaMemcpyHostToDevice,
stream);
}
scalar_t THCStorage_(get)(THCState *state, const THCStorage *self, ptrdiff_t index)
{
THArgCheck(
(index >= 0) && (index < static_cast<int64_t>(self->nbytes() / sizeof(scalar_t))),
2,
"index out of bounds");
scalar_t value;
cudaStream_t stream = c10::cuda::getCurrentCUDAStream();
at::cuda::memcpy_and_sync(&value, self->data<scalar_t>() + index, sizeof(scalar_t),
cudaMemcpyDeviceToHost, stream);
return value;
}
THCStorage* THCStorage_(new)(THCState *state)
{
THStorage* storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
0,
c10::cuda::CUDACachingAllocator::get(),
true)
.release();
return storage;
}
THCStorage* THCStorage_(newWithSize)(THCState *state, ptrdiff_t size)
{
THStorage* storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
size * sizeof(scalar_t),
c10::cuda::CUDACachingAllocator::get(),
true)
.release();
return storage;
}
THCStorage* THCStorage_(newWithMapping)(THCState *state, const char *fileName, ptrdiff_t size, int isShared)
{
THError("not available yet for THCStorage");
return NULL;
}
THCStorage* THCStorage_(newWithDataAndAllocator)(
THCState* state,
at::DataPtr&& data,
ptrdiff_t size,
at::Allocator* allocator) {
THStorage* storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
size * sizeof(scalar_t),
std::move(data),
allocator,
allocator != nullptr)
.release();
return storage;
}
void THCStorage_(retain)(THCState *state, THCStorage *self)
{
THStorage_retain(self);
}
void THCStorage_(free)(THCState *state, THCStorage *self)
{
THStorage_free(self);
}
#endif

View File

@ -1,28 +0,0 @@
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "THC/generic/THCStorage.cu"
#else
void THCStorage_(fill)(THCState *state, THCStorage *self, scalar_t value)
{
at::cuda::ThrustAllocator thrustAlloc;
thrust::device_ptr<scalar_t> self_data(self->data<scalar_t>());
thrust::fill(
#if (defined(CUDA_VERSION) && CUDA_VERSION >= 7000) || defined(USE_ROCM)
thrust::cuda::par(thrustAlloc).on(c10::cuda::getCurrentCUDAStream()),
#endif
self_data,
self_data + (self->nbytes() / sizeof(scalar_t)),
value);
}
void THCStorage_(
resizeBytes)(THCState* state, THCStorage* self, ptrdiff_t size_bytes) {
THCStorage_resizeBytes(state, self, size_bytes);
}
int THCStorage_(getDevice)(THCState* state, const THCStorage* storage) {
return THCStorage_getDevice(state, storage);
}
#endif

View File

@ -1,49 +0,0 @@
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "THC/generic/THCStorage.h"
#else
#define THCStorage THStorage
// These used to be distinct types; for some measure of backwards compatibility and documentation
// alias these to the single THCStorage type.
#define THCudaStorage THCStorage
#define THCudaByteStorage THCStorage
/* slow access -- checks everything */
TORCH_CUDA_CU_API void THCStorage_(
set)(THCState* state, THCStorage*, ptrdiff_t, scalar_t);
TORCH_CUDA_CU_API scalar_t
THCStorage_(get)(THCState* state, const THCStorage*, ptrdiff_t);
TORCH_CUDA_CU_API THCStorage* THCStorage_(new)(THCState* state);
TORCH_CUDA_CU_API THCStorage* THCStorage_(
newWithSize)(THCState* state, ptrdiff_t size);
TORCH_CUDA_CU_API THCStorage* THCStorage_(newWithMapping)(
THCState* state,
const char* filename,
ptrdiff_t size,
int shared);
TORCH_CUDA_CU_API THCStorage* THCStorage_(newWithDataAndAllocator)(
THCState* state,
at::DataPtr&& data,
ptrdiff_t size,
at::Allocator* allocator);
TORCH_CUDA_CU_API void THCStorage_(
setFlag)(THCState* state, THCStorage* storage, const char flag);
TORCH_CUDA_CU_API void THCStorage_(
clearFlag)(THCState* state, THCStorage* storage, const char flag);
TORCH_CUDA_CU_API void THCStorage_(
retain)(THCState* state, THCStorage* storage);
TORCH_CUDA_CU_API void THCStorage_(free)(THCState* state, THCStorage* storage);
TORCH_CUDA_CU_API void THCStorage_(
resizeBytes)(THCState* state, THCStorage* storage, ptrdiff_t size_bytes);
TORCH_CUDA_CU_API void THCStorage_(
fill)(THCState* state, THCStorage* storage, scalar_t value);
TORCH_CUDA_CU_API int THCStorage_(
getDevice)(THCState* state, const THCStorage* storage);
#endif

View File

@ -7,7 +7,7 @@
/**** creation methods ****/
THCTensor *THCTensor_(newWithStorage1d)(THCState *state, THCStorage *storage, ptrdiff_t storageOffset,
THCTensor *THCTensor_(newWithStorage1d)(THCState *state, c10::StorageImpl *storage, ptrdiff_t storageOffset,
int64_t size0, int64_t stride0)
{
c10::raw::intrusive_ptr::incref(storage);

View File

@ -20,12 +20,12 @@
#define THCudaComplexDoubleTensor THCTensor
/**** access methods ****/
TORCH_CUDA_CU_API THCStorage* THCTensor_(
TORCH_CUDA_CU_API c10::StorageImpl* THCTensor_(
storage)(THCState* state, const THCTensor* self);
/**** creation methods ****/
TORCH_CUDA_CU_API THCTensor* THCTensor_(newWithStorage1d)(
THCState* state,
THCStorage* storage_,
c10::StorageImpl* storage_,
ptrdiff_t storageOffset_,
int64_t size0_,
int64_t stride0_);

View File

@ -66,7 +66,7 @@ class C10_API intrusive_ptr_target {
// plus one more if refcount > 0
// An invariant: refcount > 0 => weakcount > 0
//
// - THStorage stays live as long as there are any strong
// - c10::StorageImpl stays live as long as there are any strong
// or weak pointers to it (weakcount > 0, since strong
// references count as a +1 to weakcount)
//
@ -75,7 +75,7 @@ class C10_API intrusive_ptr_target {
// - Once refcount == 0, it can never again be > 0 (the transition
// from > 0 to == 0 is monotonic)
//
// - When you access THStorage via a weak pointer, you must
// - When you access c10::StorageImpl via a weak pointer, you must
// atomically increment the use count, if it is greater than 0.
// If it is not, you must report that the storage is dead.
//

View File

@ -1229,7 +1229,6 @@ aten_native_source_non_codegen_list = [
"aten/src/ATen/native/sparse/SparseTensorMath.cpp",
"aten/src/ATen/native/sparse/SparseCsrTensorMath.cpp",
"aten/src/TH/THGeneral.cpp",
"aten/src/TH/THStorageFunctions.cpp",
"aten/src/TH/THTensor.cpp",
"aten/src/ATen/native/utils/Factory.cpp",
"aten/src/ATen/native/xnnpack/Activation.cpp",

View File

@ -116,7 +116,7 @@ PyObject* createPyObject(
auto type = getPyTypeObject(storage, data_type);
auto obj = THPObjectPtr(type->tp_alloc(type, 0));
if (!obj) throw python_error();
((THPVoidStorage*)obj.get())->cdata = (THVoidStorage *)at::Storage(/* copy */ storage).unsafeReleaseStorageImpl();
((THPVoidStorage*)obj.get())->cdata = at::Storage(/* copy */ storage).unsafeReleaseStorageImpl();
return obj.release();
}

View File

@ -14,7 +14,7 @@ struct THPVoidTensor {
struct THPVoidStorage {
PyObject_HEAD
THVoidStorage *cdata;
c10::StorageImpl *cdata;
};
} // namespace torch

View File

@ -30,7 +30,8 @@ One important case where this assumption is important is when tracking
the CUDA device a tensor is stored in: this information is stored
solely in the storage, so if a storage is nullptr, we lose this information.
Although storage is never nullptr, the data field of THStorage may be nullptr. This
Although storage is never nullptr, the data field of c10::StorageImpl may be
nullptr. This
mostly occurs when we want to pre-allocate an output tensor struct, but then
have it be resized and filled with data by some operator: there's no point in
allocating data for it in this case!

View File

@ -4,12 +4,9 @@
#endif
#include <structmember.h>
#define THP_HOST_HALF
#include <TH/TH.h>
// See Note [TH abstraction violation]
// - Used to get at the allocator associated with a storage
#include <TH/THStorageFunctions.hpp>
#include <libshm.h>
#include <torch/csrc/THP.h>
#include <torch/csrc/copy_utils.h>
@ -17,6 +14,7 @@
#include <torch/csrc/CudaIPCTypes.h>
#include <torch/csrc/Device.h>
#include <torch/csrc/autograd/utils/wrap_outputs.h>
#include <c10/core/CPUAllocator.h>
#include <fmt/format.h>
@ -24,9 +22,11 @@
#include <torch/csrc/generic/Storage.cpp>
#include <TH/THGenerateByteType.h>
#include <c10/util/intrusive_ptr.h>
template<>
void THPPointer<THStorage>::free() {
void THPPointer<c10::StorageImpl>::free() {
if (ptr) {
THStorage_free(ptr);
c10::raw::intrusive_ptr::decref(ptr);
}
}

View File

@ -1,5 +1,5 @@
#pragma once
struct THPStorage {
PyObject_HEAD
THWStorage *cdata;
c10::StorageImpl *cdata;
};

View File

@ -23,8 +23,6 @@
#define LIBRARY_STATE_TYPE
#define LIBRARY_STATE_TYPE_NOARGS
#define THWStorage THStorage
#define THWStorage_(NAME) THStorage_(NAME)
#define THWTensor THTensor
#define THWTensor_(NAME) THTensor_(NAME)

View File

@ -12,8 +12,6 @@ template <typename T> struct THPTypeInfo {};
namespace torch {
typedef THStorage THVoidStorage;
typedef THTensor THVoidTensor;
} // namespace torch

View File

@ -1,12 +1,9 @@
#include <torch/csrc/cuda/undef_macros.h>
#define THWStoragePtr THCStoragePtr
#define THPStoragePtr THCPStoragePtr
#define THWTensorPtr THCTensorPtr
#define THPTensorPtr THCPTensorPtr
#define THWStorage THCStorage
#define THWStorage_(NAME) THCStorage_(NAME)
#define THWTensor THCTensor
#define THWTensor_(NAME) THCTensor_(NAME)
@ -50,5 +47,3 @@
#define THHostTensor TH_CONCAT_3(TH,Real,Tensor)
#define THHostTensor_(NAME) TH_CONCAT_4(TH,Real,Tensor_,NAME)
#define THHostStorage TH_CONCAT_3(TH,Real,Storage)
#define THHostStorage_(NAME) TH_CONCAT_4(TH,Real,Storage_,NAME)

View File

@ -11,7 +11,6 @@
#define THPStorageClass TH_CONCAT_3(THP,Real,StorageClass)
#define THPStorage_(NAME) TH_CONCAT_4(THP,Real,Storage_,NAME)
#define THWStoragePtr TH_CONCAT_3(TH,Real,StoragePtr)
#define THWTensorPtr TH_CONCAT_3(TH,Real,TensorPtr)
#define THPStoragePtr TH_CONCAT_3(THP,Real,StoragePtr)
#define THPTensorPtr TH_CONCAT_3(THP,Real,TensorPtr)

View File

@ -21,12 +21,9 @@
#undef THPStorageClass
#undef THPStorageType
#undef THWStorage
#undef THWStorage_
#undef THWTensor
#undef THWTensor_
#undef THWStoragePtr
#undef THPStoragePtr
#undef THWTensorPtr
#undef THPTensorPtr
@ -48,5 +45,3 @@
#undef THHostTensor
#undef THHostTensor_
#undef THHostStorage
#undef THHostStorage_

View File

@ -3,7 +3,6 @@
#define THCPUtils_(NAME) TH_CONCAT_4(THCP,Real,Utils_,NAME)
#define THCStoragePtr TH_CONCAT_3(THC,Real,StoragePtr)
#define THCTensorPtr TH_CONCAT_3(THC,Real,TensorPtr)
#define THCPStoragePtr TH_CONCAT_3(THCP,Real,StoragePtr)
#define THCPTensorPtr TH_CONCAT_3(THCP,Real,TensorPtr)

View File

@ -4,7 +4,7 @@
PyObject *THPStorageClass = nullptr;
PyObject * THPStorage_(New)(THWStorage *ptr)
PyObject * THPStorage_(New)(c10::StorageImpl *ptr)
{
AT_ASSERT(ptr);
PyTypeObject *type = (PyTypeObject *)THPStorageClass;
@ -12,25 +12,17 @@ PyObject * THPStorage_(New)(THWStorage *ptr)
if (obj) {
((THPStorage *)obj)->cdata = ptr;
} else {
THWStorage_(free)(LIBRARY_STATE ptr);
c10::raw::intrusive_ptr::decref(ptr);
}
return obj;
}
static void THPStorage_(dealloc)(THPStorage* self)
{
THWStorage_(free)(LIBRARY_STATE self->cdata);
Py_TYPE(self)->tp_free((PyObject*)self);
if (self->cdata) {
c10::raw::intrusive_ptr::decref(self->cdata);
}
static THWStorage* THPStorage_(newWithAllocator)(int64_t size, at::Allocator* allocator)
{
#if defined(THC_GENERIC_FILE)
THPUtils_setError(THPStorageStr " does not support custom allocators");
return nullptr;
#else
return THWStorage_(newWithAllocator)(LIBRARY_STATE size, allocator);
#endif
Py_TYPE(self)->tp_free((PyObject*)self);
}
static PyObject * THPStorage_(pynew)(PyTypeObject *type, PyObject *args, PyObject *kwargs)
@ -55,22 +47,28 @@ static PyObject * THPStorage_(pynew)(PyTypeObject *type, PyObject *args, PyObjec
if (num_args == 0) {
PyObject *cdata_ptr = PyDict_GetItemString(kwargs, "cdata");
if (num_kwargs == 1 && cdata_ptr && THPUtils_checkLong(cdata_ptr)) {
THWStorage *ptr = (THWStorage*)PyLong_AsVoidPtr(cdata_ptr);
c10::StorageImpl *ptr = (c10::StorageImpl*)PyLong_AsVoidPtr(cdata_ptr);
self->cdata = ptr;
return (PyObject*)self.release();
}
}
THPUtils_assert(num_kwargs == 0, THPStorageStr "(): invalid keyword arguments");
}
if (allocator == nullptr) {
#if defined(THC_GENERIC_FILE)
allocator = c10::cuda::CUDACachingAllocator::get();
#else
allocator = c10::GetDefaultCPUAllocator();
#endif
}
// torch.Storage()
if (num_args == 0) {
// NOLINTNEXTLINE(bugprone-branch-clone)
if (allocator) {
self->cdata = THPStorage_(newWithAllocator)(0, allocator);
} else {
self->cdata = THWStorage_(new)(LIBRARY_STATE_NOARGS);
}
self->cdata = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
0,
allocator,
/*resizable=*/true).release();
return (PyObject*)self.release();
}
@ -79,12 +77,11 @@ static PyObject * THPStorage_(pynew)(PyTypeObject *type, PyObject *args, PyObjec
// torch.Storage(size)
if (num_args == 1 && THPUtils_checkLong(first_arg)) {
int64_t size = THPUtils_unpackLong(first_arg);
// NOLINTNEXTLINE(bugprone-branch-clone)
if (allocator) {
self->cdata = THPStorage_(newWithAllocator)(size, allocator);
} else {
self->cdata = THWStorage_(newWithSize)(LIBRARY_STATE size);
}
self->cdata = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
size,
allocator,
/*resizable=*/true).release();
return (PyObject*)self.release();
}
@ -93,7 +90,12 @@ static PyObject * THPStorage_(pynew)(PyTypeObject *type, PyObject *args, PyObjec
Py_ssize_t length = PySequence_Length(first_arg);
THPUtils_assert(length >= 0, "couldn't obtain the length of %s",
THPUtils_typename(first_arg));
self->cdata = THWStorage_(newWithSize)(LIBRARY_STATE length);
self->cdata = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
length,
allocator,
/*resizable=*/true)
.release();
THPObjectPtr item;
try {
for (Py_ssize_t i = 0; i < length; i++) {
@ -104,7 +106,10 @@ static PyObject * THPStorage_(pynew)(PyTypeObject *type, PyObject *args, PyObjec
self->cdata->unsafe_data<scalar_t>()[i] = value;
#else
// TODO: this might be slow - consider batched updates?
THCStorage_(set)(LIBRARY_STATE self->cdata, i, value);
storage_set(
at::unsafeStorageFromTH(self->cdata, /*retain=*/true),
i,
value);
#endif
}
} catch (const std::exception &e) {
@ -147,7 +152,7 @@ static PyObject * THPStorage_(get)(THPStorage *self, PyObject *index)
nindex, self->cdata->nbytes() / sizeof(scalar_t)));
return nullptr;
}
scalar_t value = THWStorage_(get)(LIBRARY_STATE self->cdata, nindex);
scalar_t value = storage_get(at::unsafeStorageFromTH(self->cdata, /*retain=*/true), nindex);
return THPUtils_(newReal)(value);
/* Slice index */
} else if (PySlice_Check(index)) {
@ -205,7 +210,10 @@ static int THPStorage_(set)(THPStorage *self, PyObject *index, PyObject *value)
scalar_t rvalue = THPUtils_(unpackReal)(value);
if (THPUtils_checkLong(index)) {
int64_t nindex = THPUtils_unpackLong(index);
THWStorage_(set)(LIBRARY_STATE self->cdata, nindex, rvalue);
storage_set(
at::unsafeStorageFromTH(self->cdata, /*retain=*/true),
nindex,
rvalue);
return 0;
} else if (PySlice_Check(index)) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
@ -221,7 +229,10 @@ static int THPStorage_(set)(THPStorage *self, PyObject *index, PyObject *value)
// TODO: check the bounds only once
// TODO: fill?
for (;start < stop; start++)
THWStorage_(set)(LIBRARY_STATE self->cdata, start, rvalue);
storage_set(
at::unsafeStorageFromTH(self->cdata, /*retain=*/true),
start,
rvalue);
return 0;
}
THPUtils_setError("can't index a " THPStorageStr " with %s",

View File

@ -4,7 +4,7 @@
#include <torch/csrc/StorageDefs.h>
THP_API PyObject * THPStorage_(New)(THWStorage *ptr);
THP_API PyObject * THPStorage_(New)(c10::StorageImpl *ptr);
extern PyObject *THPStorageClass;
#include <torch/csrc/Types.h>

View File

@ -8,6 +8,13 @@
#include <cuda_runtime.h>
#endif
#if !defined(THC_GENERIC_FILE)
#include <c10/core/CPUAllocator.h>
#include <ATen/native/Resize.h>
#else
#include <ATen/native/cuda/Resize.h>
#endif
#ifdef _MSC_VER
#define LSEEK _lseeki64
#else
@ -79,9 +86,18 @@ static PyObject * THPStorage_(new)(PyObject *_self, PyObject *noargs)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
THWStoragePtr new_storage(THWStorage_(new)(LIBRARY_STATE_NOARGS));
auto new_storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
0,
#if defined(THC_GENERIC_FILE)
c10::cuda::CUDACachingAllocator::get(),
#else
c10::GetDefaultCPUAllocator(),
#endif
/*resizable=*/true);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
PyObject *_ret = THPStorage_(New)(new_storage);
PyObject *_ret = THPStorage_(New)(new_storage.get());
new_storage.release();
return _ret;
END_HANDLE_TH_ERRORS
@ -94,8 +110,16 @@ static PyObject * THPStorage_(resize_)(PyObject *_self, PyObject *number_arg)
THPUtils_assert(THPUtils_checkLong(number_arg), "resize_ expects an int, "
"but got %s", THPUtils_typename(number_arg));
int64_t newsize = THPUtils_unpackLong(number_arg);
THWStorage_(resizeBytes)(
LIBRARY_STATE self->cdata, newsize * sizeof(scalar_t));
#if defined(THC_GENERIC_FILE)
ptrdiff_t size_bytes_i = newsize;
TORCH_CHECK(!c10::overflows<size_t>(size_bytes_i),
"Requested storage size (", size_bytes_i,
") cannot be represented as a size_t");
const auto size_bytes = static_cast<size_t>(size_bytes_i);
at::native::resize_bytes_cuda(self->cdata, size_bytes);
#else
at::native::resize_bytes_cpu(self->cdata, newsize);
#endif
Py_INCREF(self);
return (PyObject*)self;
END_HANDLE_TH_ERRORS
@ -108,7 +132,9 @@ static PyObject * THPStorage_(fill_)(PyObject *_self, PyObject *number_arg)
THPUtils_assert(THPUtils_(checkReal)(number_arg), "fill_ expects %s, "
"but got %s", THPUtils_typeTraits<scalar_t>::python_type_str,
THPUtils_typename(number_arg));
THWStorage_(fill)(LIBRARY_STATE self->cdata, THPUtils_(unpackReal)(number_arg));
storage_fill(
at::unsafeStorageFromTH(self->cdata, /*retain=*/true),
THPUtils_(unpackReal)(number_arg));
Py_INCREF(self);
return (PyObject*)self;
END_HANDLE_TH_ERRORS
@ -197,12 +223,21 @@ static PyObject * THPStorage_(fromBuffer)(PyObject *_unused, PyObject *args, PyO
}
uint8_t* src = (uint8_t*) buffer.buf;
THWStorage* storage = THWStorage_(newWithSize)(size_bytes);
c10::StorageImpl* storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
size_bytes,
#if defined(THC_GENERIC_FILE)
c10::cuda::CUDACachingAllocator::get(),
#else
c10::GetDefaultCPUAllocator(),
#endif
/*resizable=*/true)
.release();
if (scalar_type == at::kByte || scalar_type == at::kChar) {
memcpy(storage->data(), src + offset, count);
} else if (scalar_type == at::kBool) {
// Because of ASAN checks, that are failing in the THStorage.cpp whenever
// Because of ASAN checks, that are failing whenever
// we are trying to get a value which is not 0 or 1, we have to manually
// convert original values to boolean ones.
torch::utils::THP_decodeBoolBuffer(
@ -269,7 +304,27 @@ static PyObject * THPStorage_(fromFile)(PyObject *_unused, PyObject *args, PyObj
}
if (shared)
shared = at::ALLOCATOR_MAPPED_SHARED;
THWStorage *storage = THWStorage_(newWithMapping)(LIBRARY_STATE filename, nbytes, shared);
c10::StorageImpl* storage;
#ifdef THC_GENERIC_FILE
THError("not available yet for CUDA");
storage = NULL;
#else
size_t actual_nbytes = -1;
storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
nbytes,
at::MapAllocator::makeDataPtr(
filename, shared, nbytes, &actual_nbytes),
/*allocator=*/nullptr,
/*resizable=*/false)
.release();
if (nbytes <= 0) {
storage->set_nbytes(actual_nbytes);
}
#endif
return (PyObject*)THPStorage_(New)(storage);
END_HANDLE_TH_ERRORS
}
@ -314,7 +369,7 @@ PyObject * THPStorage_(newWithFile)(PyObject *_unused, PyObject *args)
"_new_with_file: need to specify element size");
uint64_t element_size = THPUtils_unpackUInt64(element_size_obj);
THWStorage *storage = THPStorage_(readFileRaw<int>)(fd, nullptr, element_size);
c10::StorageImpl *storage = THPStorage_(readFileRaw<int>)(fd, nullptr, element_size);
if (storage == nullptr)
return nullptr;
PyObject *result = THPStorage_(New)(storage);
@ -341,7 +396,7 @@ static PyObject *THPStorage_(setFromFile)(PyObject *_self, PyObject *args)
// but it is currently unnecessary to support this.
THPUtils_assert(offset == Py_None,
"_set_from_file: offset is NYI for filelike objects");
THWStorage *storage = THPStorage_(readFileRaw<PyObject*>)(file, self->cdata, element_size);
c10::StorageImpl *storage = THPStorage_(readFileRaw<PyObject*>)(file, self->cdata, element_size);
if (storage == nullptr) {
return nullptr;
}
@ -357,7 +412,7 @@ static PyObject *THPStorage_(setFromFile)(PyObject *_self, PyObject *args)
}
THPUtils_assert(fd != -1, "_set_from_file couldn't retrieve a file "
"descriptor from given object");
THWStorage *storage = THPStorage_(readFileRaw<int>)(fd, self->cdata, element_size);
c10::StorageImpl *storage = THPStorage_(readFileRaw<int>)(fd, self->cdata, element_size);
if (storage == nullptr)
return nullptr;
Py_INCREF(self);
@ -382,7 +437,7 @@ PyObject * THPStorage_(getDevice)(PyObject *_self, PyObject *noargs)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
return THPUtils_packInt32(THCStorage_(getDevice)(LIBRARY_STATE self->cdata));
return THPUtils_packInt32(self->cdata->device().index());
END_HANDLE_TH_ERRORS
}
#endif
@ -394,9 +449,13 @@ PyObject * THPStorage_(_setCdata)(PyObject *_self, PyObject *new_cdata)
THPUtils_assert(THPUtils_checkLong(new_cdata), "given an invalid argument to "
"_set_cdata - expected an int or long, but got %s",
THPUtils_typename(new_cdata));
THWStorage *ptr = (THWStorage*)PyLong_AsVoidPtr(new_cdata);
THWStorage_(retain)(LIBRARY_STATE ptr);
THWStorage_(free)(LIBRARY_STATE self->cdata);
c10::StorageImpl *ptr = (c10::StorageImpl*)PyLong_AsVoidPtr(new_cdata);
if (ptr) {
c10::raw::intrusive_ptr::incref(ptr);
}
if (self->cdata) {
c10::raw::intrusive_ptr::decref(self->cdata);
}
self->cdata = ptr;
Py_INCREF(self);
return (PyObject*)self;

View File

@ -14,7 +14,7 @@ static PyObject * THPStorage_(sharedDecref)(PyObject *_self, PyObject *noargs)
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
#ifndef THC_GENERIC_FILE
THWStorage *storage = self->cdata;
c10::StorageImpl *storage = self->cdata;
THManagedMapAllocator *ctx = THManagedMapAllocator::fromDataPtr(storage->data_ptr());
if (ctx) {
ctx->decref();
@ -30,7 +30,7 @@ static PyObject * THPStorage_(sharedIncref)(PyObject *_self, PyObject *noargs)
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
#ifndef THC_GENERIC_FILE
THWStorage *storage = self->cdata;
c10::StorageImpl *storage = self->cdata;
THManagedMapAllocator *ctx = THManagedMapAllocator::fromDataPtr(storage->data_ptr());
if (ctx) {
ctx->incref();
@ -42,14 +42,6 @@ static PyObject * THPStorage_(sharedIncref)(PyObject *_self, PyObject *noargs)
#ifndef THC_GENERIC_FILE
static THWStorage* THPStorage_(newFilenameStorage)(ptrdiff_t size)
{
int flags = at::ALLOCATOR_MAPPED_SHAREDMEM | at::ALLOCATOR_MAPPED_EXCLUSIVE;
std::string handle = at::NewProcessWideShmHandle();
return THWStorage_(newWithDataAndAllocator)(
THManagedMapAllocator::makeDataPtr("", handle.c_str(), flags, size * sizeof(scalar_t)), size, /* allocator */ nullptr);
}
static PyObject * THPStorage_(pyNewFilenameStorage)(PyObject *_unused, PyObject *args)
{
HANDLE_TH_ERRORS
@ -58,7 +50,16 @@ static PyObject * THPStorage_(pyNewFilenameStorage)(PyObject *_unused, PyObject
if (!PyArg_ParseTuple(args, "L", &size)) {
return nullptr;
}
return THPStorage_(New)(THPStorage_(newFilenameStorage)(size));
int flags = at::ALLOCATOR_MAPPED_SHAREDMEM | at::ALLOCATOR_MAPPED_EXCLUSIVE;
std::string handle = at::NewProcessWideShmHandle();
return THPStorage_(New)(c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
size,
THManagedMapAllocator::makeDataPtr("", handle.c_str(), flags, size),
/*allocator=*/nullptr,
/*resizable=*/false)
.release());
END_HANDLE_TH_ERRORS
}
@ -66,7 +67,7 @@ static PyObject * THPStorage_(shareFilename)(PyObject *_self, PyObject *noargs)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
THWStorage *storage = self->cdata;
c10::StorageImpl *storage = self->cdata;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
THManagedMapAllocator *ctx;
// Storage is already in shared memory, just return a handle
@ -75,14 +76,21 @@ static PyObject * THPStorage_(shareFilename)(PyObject *_self, PyObject *noargs)
} else {
// TODO: retry on collision
// TODO: free GIL - but remember to reacquire it when an exception is thrown
THWStoragePtr new_storage(
THPStorage_(newFilenameStorage)(storage->nbytes() / sizeof(scalar_t)));
int flags = at::ALLOCATOR_MAPPED_SHAREDMEM | at::ALLOCATOR_MAPPED_EXCLUSIVE;
std::string handle = at::NewProcessWideShmHandle();
c10::StorageImpl* new_storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
storage->nbytes(),
THManagedMapAllocator::makeDataPtr("", handle.c_str(), flags, storage->nbytes()),
/*allocator=*/nullptr,
/*resizable=*/false)
.release();
at::Storage new_storage_aten = at::unsafeStorageFromTH(new_storage.get(), /*retain=*/true);
at::Storage new_storage_aten = at::unsafeStorageFromTH(new_storage, /*retain=*/true);
at::Storage _self_aten = torch::createStorage(_self);
storage_copy(new_storage_aten, _self_aten);
THWStorage_(swap)(storage, new_storage);
std::swap(*storage, *new_storage);
ctx = THManagedMapAllocator::fromDataPtr(storage->data_ptr());
AT_ASSERT(ctx);
}
@ -121,14 +129,17 @@ static PyObject * THPStorage_(newSharedFilename)(PyObject *_unused, PyObject *ar
int flags = at::ALLOCATOR_MAPPED_SHAREDMEM |
at::ALLOCATOR_MAPPED_NOCREATE;
return THPStorage_(New)(
THWStorage_(newWithDataAndAllocator)(
THManagedMapAllocator::makeDataPtr(manager_handle, object_handle, flags, size * sizeof(scalar_t)),
c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
size,
/* allocator */ nullptr));
THManagedMapAllocator::makeDataPtr(manager_handle, object_handle, flags, size),
/*allocator=*/nullptr,
/*resizable=*/false)
.release());
END_HANDLE_TH_ERRORS
}
static THWStorage* THPStorage_(newFdStorage)(ptrdiff_t size)
static c10::StorageImpl* THPStorage_(newFdStorage)(ptrdiff_t size)
{
int flags = at::ALLOCATOR_MAPPED_SHAREDMEM |
at::ALLOCATOR_MAPPED_EXCLUSIVE |
@ -136,7 +147,13 @@ static THWStorage* THPStorage_(newFdStorage)(ptrdiff_t size)
at::ALLOCATOR_MAPPED_UNLINK;
std::string handle = at::NewProcessWideShmHandle();
auto sptr = at::MapAllocator::makeDataPtr(handle.c_str(), flags, size * sizeof(scalar_t), nullptr);
return THWStorage_(newWithDataAndAllocator)(std::move(sptr), size, /* allocator */ nullptr);
return c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
size,
std::move(sptr),
/*allocator=*/nullptr,
/*resizable=*/false)
.release();
}
static PyObject * THPStorage_(pyNewFdStorage)(PyObject *_unused, PyObject *args)
@ -155,21 +172,19 @@ static PyObject * THPStorage_(shareFd)(PyObject *_self, PyObject *noargs)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
THWStorage *storage = self->cdata;
c10::StorageImpl *storage = self->cdata;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
at::MapAllocator *ctx;
// Storage is already in shared memory, just return a handle
if ((ctx = at::MapAllocator::fromDataPtr(storage->data_ptr()))) {
// done
} else {
THWStoragePtr new_storage(
THPStorage_(newFdStorage)(storage->nbytes() / sizeof(scalar_t)));
at::Storage new_storage_aten = at::unsafeStorageFromTH(new_storage.get(), /*retain=*/true);
c10::StorageImpl* new_storage = THPStorage_(newFdStorage)(storage->nbytes());
at::Storage new_storage_aten = at::unsafeStorageFromTH(new_storage, /*retain=*/true);
at::Storage _self_aten = torch::createStorage(_self);
storage_copy(new_storage_aten, _self_aten);
THWStorage_(swap)(storage, new_storage);
std::swap(*storage, *new_storage);
ctx = at::MapAllocator::fromDataPtr(storage->data_ptr());
AT_ASSERT(ctx);
}
@ -212,10 +227,13 @@ static PyObject * THPStorage_(newSharedFd)(PyObject *_unused, PyObject *args)
at::ALLOCATOR_MAPPED_KEEPFD |
at::ALLOCATOR_MAPPED_FROMFD;
return THPStorage_(New)(
THWStorage_(newWithDataAndAllocator)(
// TODO: Maybe we should read out the scalar_t size and use it for size
at::MapAllocator::makeDataPtr(at::WITH_FD, "", fd, flags, size * sizeof(scalar_t), nullptr),
size, /* allocator */ nullptr));
c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
size,
at::MapAllocator::makeDataPtr(at::WITH_FD, "", fd, flags, size, nullptr),
/*allocator=*/nullptr,
/*resizable=*/false)
.release());
END_HANDLE_TH_ERRORS
}
@ -225,7 +243,7 @@ static PyObject * THPStorage_(shareCuda)(PyObject *_self, PyObject *noargs)
{
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
THWStorage *storage = self->cdata;
c10::StorageImpl *storage = self->cdata;
if (storage->received_cuda()) {
AT_ERROR(
@ -471,10 +489,13 @@ static PyObject * THPStorage_(newSharedCuda)(PyObject *_unused, PyObject *args)
},
at::Device(at::DeviceType::CUDA, cur_device));
THWStoragePtr base(THWStorage_(newWithDataAndAllocator)(
LIBRARY_STATE
auto base = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
storage_size,
std::move(data_ptr),
storage_size, /* allocator */ nullptr));
/*allocator=*/nullptr,
/*resizable=*/false);
base->set_resizable(false);
base->set_received_cuda(true);
@ -483,15 +504,15 @@ static PyObject * THPStorage_(newSharedCuda)(PyObject *_unused, PyObject *args)
}
#endif
// Returns an object that holds a "weak" pointer to the THStorage. This
// pointer keeps the THStorage struct live, but does not retain the data
// Returns an object that holds a "weak" pointer to the c10::StorageImpl. This
// pointer keeps the c10::StorageImpl struct live, but does not retain the data
// pointer.
//
// NB: This does NOT preserve object identity when you call it multiple times
static PyObject * THPStorage_(weakRef)(PyObject *_self, PyObject *args) {
HANDLE_TH_ERRORS
auto self = (THPStorage*)_self;
THStorage* storage = self->cdata;
c10::StorageImpl* storage = self->cdata;
return PyLong_FromVoidPtr(c10::raw::intrusive_ptr::make_weak(storage));
END_HANDLE_TH_ERRORS
}
@ -501,7 +522,7 @@ PyObject * THPStorage_(newWithWeakPtr)(PyObject *_unused, PyObject *arg)
HANDLE_TH_ERRORS
THPUtils_assert(THPUtils_checkLong(arg),
"_new_with_weak_ptr(): arg must be an 'int'");
THStorage *weak_storage = (THStorage*)PyLong_AsVoidPtr(arg);
c10::StorageImpl *weak_storage = (c10::StorageImpl*)PyLong_AsVoidPtr(arg);
if (auto* storage = c10::raw::weak_intrusive_ptr::lock(weak_storage)) {
return THPStorage_(New)(storage);
}
@ -517,7 +538,7 @@ PyObject * THPStorage_(freeWeakRef)(PyObject *_unused, PyObject *arg)
}
THPUtils_assert(THPUtils_checkLong(arg),
"_free_weak_ref(): arg must be an 'int'");
THStorage *weak_storage = (THStorage*)PyLong_AsVoidPtr(arg);
c10::StorageImpl *weak_storage = (c10::StorageImpl*)PyLong_AsVoidPtr(arg);
c10::raw::weak_intrusive_ptr::decref(weak_storage);
Py_RETURN_NONE;
@ -528,7 +549,7 @@ PyObject * THPStorage_(expired)(PyObject *_unused, PyObject *arg)
{
HANDLE_TH_ERRORS
THPUtils_assert(THPUtils_checkLong(arg), "_expired(): arg must be an 'int'");
THStorage *weak_storage = (THStorage*)PyLong_AsVoidPtr(arg);
c10::StorageImpl *weak_storage = (c10::StorageImpl*)PyLong_AsVoidPtr(arg);
return PyBool_FromLong(c10::raw::weak_intrusive_ptr::use_count(weak_storage) == 0);
END_HANDLE_TH_ERRORS
}
@ -539,7 +560,7 @@ PyObject * THPStorage_(sharedFd)(PyObject *_self, PyObject *noargs)
auto self = (THPStorage*)_self;
at::MapAllocator *ctx = nullptr;
#ifndef THC_GENERIC_FILE
THWStorage *storage = self->cdata;
c10::StorageImpl *storage = self->cdata;
ctx = at::MapAllocator::fromDataPtr(storage->data_ptr());
#endif

View File

@ -4,13 +4,15 @@
#ifdef THC_GENERIC_FILE
#include <c10/cuda/CUDAGuard.h>
#else
#include <c10/core/CPUAllocator.h>
#endif
// save_save is necessary since the old eager format saved storages as
// [size + data], but the v1.5 eager format removes this since size is saved in
// the filesize.
template <class io>
void THPStorage_(writeFileRaw)(THWStorage *self, io fd, bool save_size, uint64_t element_size)
void THPStorage_(writeFileRaw)(c10::StorageImpl *self, io fd, bool save_size, uint64_t element_size)
{
#ifdef THC_GENERIC_FILE
c10::cuda::CUDAGuard guard(self->device());
@ -84,11 +86,11 @@ void THPStorage_(writeFileRaw)(THWStorage *self, io fd, bool save_size, uint64_t
}
}
template void THPStorage_(writeFileRaw<int>)(THWStorage *self, int fd, bool save_size, uint64_t element_size);
template void THPStorage_(writeFileRaw<PyObject*>)(THWStorage *self, PyObject* fd, bool save_size, uint64_t element_size);
template void THPStorage_(writeFileRaw<int>)(c10::StorageImpl *self, int fd, bool save_size, uint64_t element_size);
template void THPStorage_(writeFileRaw<PyObject*>)(c10::StorageImpl *self, PyObject* fd, bool save_size, uint64_t element_size);
template <class io>
THWStorage * THPStorage_(readFileRaw)(io file, THWStorage *_storage, uint64_t element_size)
c10::StorageImpl * THPStorage_(readFileRaw)(io file, c10::StorageImpl *_storage, uint64_t element_size)
{
#ifdef THC_GENERIC_FILE
c10::cuda::OptionalCUDAGuard guard;
@ -111,9 +113,17 @@ THWStorage * THPStorage_(readFileRaw)(io file, THWStorage *_storage, uint64_t el
torch::utils::THP_decodeInt64Buffer(
&nbytes, (const uint8_t*)&nsize, torch::utils::THP_nativeByteOrder(), 1);
}
THWStoragePtr storage;
c10::intrusive_ptr<at::StorageImpl> storage;
if (_storage == nullptr) {
storage = THWStorage_(newWithSize)(LIBRARY_STATE nbytes);
storage = c10::make_intrusive<at::StorageImpl>(
c10::StorageImpl::use_byte_size_t(),
nbytes,
#if defined(THC_GENERIC_FILE)
c10::cuda::CUDACachingAllocator::get(),
#else
c10::GetDefaultCPUAllocator(),
#endif
/*resizable=*/true);
} else {
int64_t _storage_nbytes = _storage->nbytes();
THPUtils_assert(
@ -121,7 +131,7 @@ THWStorage * THPStorage_(readFileRaw)(io file, THWStorage *_storage, uint64_t el
"storage has wrong byte size: expected %ld got %ld",
nbytes,
_storage_nbytes);
storage = _storage;
storage = c10::intrusive_ptr<at::StorageImpl>::reclaim(_storage);
}
#ifndef THC_GENERIC_FILE
@ -176,7 +186,7 @@ THWStorage * THPStorage_(readFileRaw)(io file, THWStorage *_storage, uint64_t el
return storage.release();
}
template THWStorage* THPStorage_(readFileRaw<int>)(int fd, THWStorage* storage, uint64_t element_size);
template THWStorage* THPStorage_(readFileRaw<PyObject*>)(PyObject* fd, THWStorage* storage, uint64_t element_size);
template c10::StorageImpl* THPStorage_(readFileRaw<int>)(int fd, c10::StorageImpl* storage, uint64_t element_size);
template c10::StorageImpl* THPStorage_(readFileRaw<PyObject*>)(PyObject* fd, c10::StorageImpl* storage, uint64_t element_size);
#endif

View File

@ -3,9 +3,9 @@
#else
template <class io>
void THPStorage_(writeFileRaw)(THWStorage *self, io fd, bool save_size, uint64_t element_size);
void THPStorage_(writeFileRaw)(c10::StorageImpl *self, io fd, bool save_size, uint64_t element_size);
template <class io>
THWStorage * THPStorage_(readFileRaw)(io fd, THWStorage *storage, uint64_t element_size);
c10::StorageImpl * THPStorage_(readFileRaw)(io fd, c10::StorageImpl *storage, uint64_t element_size);
#endif

View File

@ -11,7 +11,6 @@
struct THPStorage;
struct THSPTensor;
typedef class THPPointer<THWStorage> THWStoragePtr;
typedef class THPPointer<THWTensor> THWTensorPtr;
typedef class THPPointer<THPStorage> THPStoragePtr;

View File

@ -198,6 +198,26 @@ void storage_copy(at::Storage dst, at::Storage src, bool non_blocking) {
dst_t.copy_(src_t, non_blocking);
}
void storage_fill(at::Storage self, uint8_t value) {
auto options = c10::TensorOptions().device(self.device()).dtype(at::kByte);
auto self_t = at::empty({0}, {}, options).set_(self);
self_t.fill_(value);
}
void storage_set(at::Storage self, ptrdiff_t idx, uint8_t value) {
TORCH_CHECK((idx >= 0) && (idx < self.nbytes()), "out of bounds");
auto options = c10::TensorOptions().device(self.device()).dtype(at::kByte);
auto self_t = at::empty({0}, {}, options).set_(self);
self_t[idx].fill_(value);
}
uint8_t storage_get(at::Storage self, ptrdiff_t idx) {
TORCH_CHECK((idx >= 0) && (idx < self.nbytes()), "out of bounds");
auto options = c10::TensorOptions().device(self.device()).dtype(at::kByte);
auto self_t = at::empty({0}, {}, options).set_(self);
return self_t[idx].item<uint8_t>();
}
template class THPPointer<THPStorage>;
namespace torch { namespace gdb {

View File

@ -138,7 +138,6 @@ void THPUtils_addPyMethodDefs(std::vector<PyMethodDef>& vector, PyMethodDef* met
int THPUtils_getCallable(PyObject *arg, PyObject **result);
#define THWStoragePtr TH_CONCAT_3(TH,Real,StoragePtr)
#define THWTensorPtr TH_CONCAT_3(TH,Real,TensorPtr)
#define THPStoragePtr TH_CONCAT_3(THP,Real,StoragePtr)
#define THPTensorPtr TH_CONCAT_3(THP,Real,TensorPtr)
@ -181,5 +180,8 @@ std::vector<c10::optional<at::cuda::CUDAStream>> THPUtils_PySequence_to_CUDAStre
#endif
void storage_copy(at::Storage dst, at::Storage src, bool non_blocking=false);
void storage_fill(at::Storage self, uint8_t value);
void storage_set(at::Storage self, ptrdiff_t idx, uint8_t value);
uint8_t storage_get(at::Storage self, ptrdiff_t idx);
#endif