Remove TH/THC Storage (#68556)

Summary: Fixes https://github.com/pytorch/pytorch/issues/67852 cc ezyang bhosmer smessmer ljk53 bdhirsh Pull Request resolved: https://github.com/pytorch/pytorch/pull/68556 Reviewed By: ejguan Differential Revision: D32652758 Pulled By: ngimel fbshipit-source-id: 170956fca112606f9008abe09b92c6ddc411be09
2025-10-20 21:14:14 +08:00 · 2021-11-29 12:49:57 -08:00
parent f5fa91ba2e
commit d9e7d85390
52 changed files with 254 additions and 702 deletions
--- a/BUILD.bazel
+++ b/BUILD.bazel
@ -349,7 +349,6 @@ filegroup(
    name = "th_srcs",
    srcs = [
        "aten/src/TH/THGeneral.cpp",
-        "aten/src/TH/THStorageFunctions.cpp",
        "aten/src/TH/THTensor.cpp",
    ],
 )
@ -385,8 +384,6 @@ filegroup(
        "aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp",
        "aten/src/ATen/native/sparse/cuda/SparseBlas.cpp",
        "aten/src/ATen/native/sparse/cuda/SparseBlasImpl.cpp",
-        "aten/src/THC/THCStorage.cpp",
-        "aten/src/THC/THCTensor.cpp",
    ],
 )

@ -396,7 +393,6 @@ filegroup(
        "aten/src/THC/THCReduceApplyUtils.cu.cc",
        "aten/src/THC/THCSleep.cu.cc",
        "aten/src/THC/THCSortUtils.cu.cc",
-        "aten/src/THC/THCStorage.cu.cc",
        "aten/src/THC/THCTensor.cu.cc",
        "aten/src/THC/THCTensorCopy.cu.cc",
        "aten/src/THC/THCTensorMathScan.cu.cc",
--- a/aten/src/ATen/hip/impl/HIPCachingAllocatorMasqueradingAsCUDA.h
+++ b/aten/src/ATen/hip/impl/HIPCachingAllocatorMasqueradingAsCUDA.h
@ -10,7 +10,7 @@ class DataPtr;
 namespace hip {
 namespace HIPCachingAllocatorMasqueradingAsCUDA {

-Allocator* get();
+C10_HIP_API Allocator* get();
 C10_HIP_API void recordStreamMasqueradingAsCUDA(const DataPtr& ptr, HIPStreamMasqueradingAsCUDA stream);

 } // namespace HIPCachingAllocatorMasqueradingAsCUDA
--- a/aten/src/README.md
+++ b/aten/src/README.md
@ -17,7 +17,7 @@ multiple variants of the library, summarized here:
 PyTorch employs reference counting in order to permit tensors to provide
 differing views on a common underlying storage.  For example, when you call
 view() on a Tensor, a new THTensor is allocated with differing dimensions,
-but it shares the same THStorage with the original tensor.
+but it shares the same c10::StorageImpl with the original tensor.

 Unfortunately, this means we are in the business of manually tracking reference
 counts inside our C library code.  Fortunately, for most of our library code implementing
@ -63,9 +63,9 @@ of freeing it.  If that function holds on to a pointer to the object, it
 will `retain` it itself.

 ```
-  THLongStorage *inferred_size = THLongStorage_newInferSize(size, numel);
+  THByteStorage *inferred_size = THByteStorage_newInferSize(size, numel);
  THTensor_(setStorage)(self, tensor->storage, tensor->storageOffset, inferred_size, NULL);
-  THLongStorage_free(inferred_size);
+  c10::raw::intrusive_ptr::decref(inferred_size);
 ```

 Sometimes, you have a tensor in hand which you'd like to use directly, but
--- a/aten/src/TH/CMakeLists.txt
+++ b/aten/src/TH/CMakeLists.txt
@ -1,11 +1,10 @@
 set(Aten_TH_AVX_extra_src)

 set(hdr
-  THGeneral.h THHalf.h THStorage.h THStorageFunctions.h THTensor.h)
+  THGeneral.h THHalf.h THTensor.h)

 set(ATen_TH_SRCS
  ${CMAKE_CURRENT_SOURCE_DIR}/THGeneral.cpp
-  ${CMAKE_CURRENT_SOURCE_DIR}/THStorageFunctions.cpp
  ${CMAKE_CURRENT_SOURCE_DIR}/THTensor.cpp
  )
 # Remember that PARENT_SCOPE variables are not in the current scope
@ -35,17 +34,12 @@ install(FILES
  TH.h
  ${CMAKE_CURRENT_BINARY_DIR}/THGeneral.h
  THGenerateByteType.h
-  THStorage.h
-  THStorageFunctions.h
  THTensor.h
  THHalf.h
  THTensor.hpp
-  THStorageFunctions.hpp
  DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/TH")

 install(FILES
-  generic/THStorage.cpp
-  generic/THStorage.h
  generic/THTensor.cpp
  generic/THTensor.h
  # See Note [TH abstraction violation]
--- a/aten/src/TH/TH.h
+++ b/aten/src/TH/TH.h
@ -2,8 +2,6 @@
 #define TH_INC

 #include <TH/THGeneral.h>
-
-#include <TH/THStorageFunctions.h>
 #include <TH/THTensor.h>

 #endif
--- a/aten/src/TH/THStorage.h
+++ b/aten/src/TH/THStorage.h
@ -1,4 +0,0 @@
-#pragma once
-#include <TH/THStorageFunctions.h>
-
-// Compatibility header. Use THStorageFunctions.h instead if you need this.
--- a/aten/src/TH/THStorageFunctions.cpp
+++ b/aten/src/TH/THStorageFunctions.cpp
@ -1,39 +0,0 @@
-#include <climits>
-#include <c10/util/intrusive_ptr.h>
-
-#include <TH/THStorageFunctions.hpp>
-
-// NOLINTNEXTLINE(bugprone-suspicious-include)
-#include <TH/generic/THStorage.cpp>
-#include <TH/THGenerateByteType.h>
-
-#include <ATen/native/Resize.h>
-
-THStorage* THStorage_new() {
-  THStorage* storage = c10::make_intrusive<at::StorageImpl>(
-                           c10::StorageImpl::use_byte_size_t(),
-                           0,
-                           c10::GetDefaultCPUAllocator(),
-                           true)
-                           .release();
-  return storage;
-}
-
-// Free a non-weak pointer to THStorage
-void THStorage_free(THStorage* storage) {
-  if (!storage) {
-    return;
-  }
-  c10::raw::intrusive_ptr::decref(storage);
-}
-
-void THStorage_retain(THStorage *storage)
-{
-  if (storage) {
-    c10::raw::intrusive_ptr::incref(storage);
-  }
-}
-
-void THStorage_resizeBytes(THStorage* storage, ptrdiff_t size_bytes) {
-  at::native::resize_bytes_cpu(storage, size_bytes);
-}
--- a/aten/src/TH/THStorageFunctions.h
+++ b/aten/src/TH/THStorageFunctions.h
@ -1,11 +0,0 @@
-#pragma once
-
-#include <TH/THGeneral.h>
-
-#define THStorage_(NAME) TH_CONCAT_4(TH,Real,Storage_,NAME)
-
-#include <TH/generic/THStorage.h>
-#include <TH/THGenerateByteType.h>
-
-// This exists to have a data-type independent way of freeing (necessary for THPPointer).
-TH_API void THStorage_free(THStorage *storage);
--- a/aten/src/TH/THStorageFunctions.hpp
+++ b/aten/src/TH/THStorageFunctions.hpp
@ -1,38 +0,0 @@
-#pragma once
-
-// STOP!!! Thinking of including this header directly?  Please
-// read Note [TH abstraction violation]
-
-#include <c10/core/Storage.h>
-#include <c10/core/StorageImpl.h>
-#include <TH/THStorageFunctions.h>
-
-#include <c10/core/ScalarType.h>
-#include <c10/core/ScalarTypeToTypeMeta.h>
-
-// Note [Weak references for intrusive refcounting]
-// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-// Here's the scheme:
-//
-//  - refcount == number of strong references to the object
-//    weakcount == number of weak references to the object,
-//      plus one more if refcount > 0
-//
-//  - THStorage stays live as long as there are any strong
-//    or weak pointers to it (weakcount > 0, since strong
-//    references count as a +1 to weakcount)
-//
-//  - finalizers are called and data_ptr is deallocated when refcount == 0
-//
-//  - Once refcount == 0, it can never again be > 0 (the transition
-//    from > 0 to == 0 is monotonic)
-//
-//  - When you access THStorage via a weak pointer, you must
-//    atomically increment the use count, if it is greater than 0.
-//    If it is not, you must report that the storage is dead.
-//
-
-TH_CPP_API THStorage* THStorage_new();
-
-TH_API void THStorage_retain(THStorage *storage);
-TH_API void THStorage_resizeBytes(THStorage* storage, ptrdiff_t size_bytes);
--- a/aten/src/TH/THTensor.cpp
+++ b/aten/src/TH/THTensor.cpp
@ -16,7 +16,7 @@ void THTensor_free(THTensor *self)
  c10::raw::intrusive_ptr::decref(self);
 }

-void THTensor_setStorage(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_) {
+void THTensor_setStorage(THTensor *self, c10::StorageImpl *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_) {
  c10::raw::intrusive_ptr::incref(storage_);
  THTensor_wrap(self).set_(at::Storage(c10::intrusive_ptr<at::StorageImpl>::reclaim(storage_)), storageOffset_, size_, stride_);
 }
--- a/aten/src/TH/THTensor.h
+++ b/aten/src/TH/THTensor.h
@ -1,7 +1,7 @@
 #ifndef TH_TENSOR_INC
 #define TH_TENSOR_INC

-#include <TH/THStorageFunctions.h>
+#include <TH/THGeneral.h>

 #define THTensor_(NAME)   TH_CONCAT_4(TH,Real,Tensor_,NAME)

--- a/aten/src/TH/THTensor.hpp
+++ b/aten/src/TH/THTensor.hpp
@ -4,7 +4,7 @@
 // read Note [TH abstraction violation]

 #include <TH/THTensor.h>
-#include <TH/THStorageFunctions.hpp>
+#include <c10/core/StorageImpl.h>

 #include <atomic>
 #include <ATen/ATen.h>
@ -18,4 +18,4 @@ inline at::Tensor THTensor_wrap(THTensor* tensor) {

 TH_API void THTensor_free(THTensor *self);

-TH_CPP_API void THTensor_setStorage(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_);
+TH_CPP_API void THTensor_setStorage(THTensor *self, c10::StorageImpl *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_);
--- a/aten/src/TH/generic/THStorage.cpp
+++ b/aten/src/TH/generic/THStorage.cpp
@ -1,129 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "TH/generic/THStorage.cpp"
-#else
-
-#include <ATen/MapAllocator.h>
-#include <c10/core/CPUAllocator.h>
-#include <c10/util/irange.h>
-
-#include <new>
-
-
-THStorage* THStorage_(new)(void)
-{
-  return THStorage_new();
-}
-
-THStorage* THStorage_(newWithSize)(ptrdiff_t size)
-{
-  THStorage* storage = c10::make_intrusive<at::StorageImpl>(
-                           c10::StorageImpl::use_byte_size_t(),
-#ifdef THQUANTIZED
-                           size * sizeof(quantized_t),
-#else
-                           size * sizeof(scalar_t),
-#endif
-                           c10::GetDefaultCPUAllocator(),
-                           true)
-                           .release();
-  return storage;
-}
-
-THStorage* THStorage_(newWithAllocator)(ptrdiff_t size,
-                                        at::Allocator *allocator)
-{
-  THStorage* storage = c10::make_intrusive<at::StorageImpl>(
-                           c10::StorageImpl::use_byte_size_t(),
-#ifdef THQUANTIZED
-                           size * sizeof(quantized_t),
-#else
-                           size * sizeof(scalar_t),
-#endif
-                           allocator,
-                           true)
-                           .release();
-  return storage;
-}
-
-
-THStorage* THStorage_(newWithMapping)(const char *filename, ptrdiff_t size, int flags)
-{
-  size_t actual_size = -1;
-  THStorage* storage =
-      c10::make_intrusive<at::StorageImpl>(
-          c10::StorageImpl::use_byte_size_t(),
-          size * sizeof(scalar_t),
-          at::MapAllocator::makeDataPtr(
-              filename, flags, size * sizeof(scalar_t), &actual_size),
-          /* allocator */ nullptr,
-          false)
-          .release();
-
-  if (size <= 0) {
-    storage->set_nbytes(actual_size);
-  }
-
-  return storage;
-}
-
-void THStorage_(retain)(THStorage *storage)
-{
-  THStorage_retain(storage);
-}
-
-void THStorage_(free)(THStorage *storage)
-{
-  THStorage_free(storage);
-}
-
-THStorage* THStorage_(newWithDataAndAllocator)(at::DataPtr&& data, ptrdiff_t size,
-                                               at::Allocator* allocator) {
-  THStorage* storage = c10::make_intrusive<at::StorageImpl>(
-                           c10::StorageImpl::use_byte_size_t(),
-#ifdef THQUANTIZED
-                           size * sizeof(quantized_t),
-#else
-                           size * sizeof(scalar_t),
-#endif
-                           std::move(data),
-                           allocator,
-                           allocator != nullptr)
-                           .release();
-  return storage;
-}
-
-void THStorage_(resizeBytes)(THStorage* storage, ptrdiff_t size_bytes) {
-  return THStorage_resizeBytes(storage, size_bytes);
-}
-
-void THStorage_(fill)(THStorage *storage, scalar_t value)
-{
-  const auto type_meta = caffe2::TypeMeta::Make<scalar_t>();
-  const size_t numel = storage->nbytes() / type_meta.itemsize();
-  for (const auto i : c10::irange(numel)) {
-    storage->data<scalar_t>()[i] = value;
-  }
-}
-
-void THStorage_(set)(THStorage *self, ptrdiff_t idx, scalar_t value)
-{
-  const auto type_meta = caffe2::TypeMeta::Make<scalar_t>();
-  const auto numel = static_cast<int64_t>(self->nbytes() / type_meta.itemsize());
-  THArgCheck((idx >= 0) && (idx < numel), 2, "out of bounds");
-  self->data<scalar_t>()[idx] = value;
-}
-
-scalar_t THStorage_(get)(const THStorage *self, ptrdiff_t idx)
-{
-  const auto type_meta = caffe2::TypeMeta::Make<scalar_t>();
-  const auto numel = static_cast<int64_t>(self->nbytes() / type_meta.itemsize());
-  THArgCheck((idx >= 0) && (idx < numel), 2, "out of bounds");
-  return self->data<scalar_t>()[idx];
-}
-
-void THStorage_(swap)(THStorage *storage1, THStorage *storage2)
-{
-  std::swap(*storage1, *storage2);
-}
-
-#endif
--- a/aten/src/TH/generic/THStorage.h
+++ b/aten/src/TH/generic/THStorage.h
@ -1,54 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "TH/generic/THStorage.h"
-#else
-
-#include <c10/core/Allocator.h>
-#include <c10/core/StorageImpl.h>
-
-/* on pourrait avoir un liste chainee
-   qui initialise math, lab structures (or more).
-   mouais -- complique.
-
-   Pb: THMapStorage is kind of a class
-   THLab_()... comment je m'en sors?
-
-   en template, faudrait que je les instancie toutes!!! oh boy!
-   Et comment je sais que c'est pour Cuda? Le type float est le meme dans les <>
-
-   au bout du compte, ca serait sur des pointeurs float/double... etc... = facile.
-   primitives??
- */
-
-// Struct definition is moved to THStorage.hpp (so this file stays C compatible)
-
-#define THStorage at::StorageImpl
-
-// These used to be distinct types; for some measure of backwards compatibility and documentation
-// alias these to the single THStorage type.
-#define THByteStorage THStorage
-
-/* slow access -- checks everything */
-TH_API void THStorage_(set)(THStorage*, ptrdiff_t, scalar_t);
-TH_API scalar_t THStorage_(get)(const THStorage*, ptrdiff_t);
-
-TH_API THStorage* THStorage_(new)(void);
-TH_API THStorage* THStorage_(newWithSize)(ptrdiff_t size);
-TH_API THStorage* THStorage_(newWithMapping)(const char *filename, ptrdiff_t size, int flags);
-
-TH_API THStorage* THStorage_(newWithAllocator)(ptrdiff_t size,
-                                               c10::Allocator* allocator);
-TH_API THStorage* THStorage_(newWithDataAndAllocator)(
-    at::DataPtr&& data, ptrdiff_t size, at::Allocator* allocator);
-
-/* should not differ with API */
-TH_API void THStorage_(setFlag)(THStorage *storage, const char flag);
-TH_API void THStorage_(clearFlag)(THStorage *storage, const char flag);
-TH_API void THStorage_(retain)(THStorage *storage);
-TH_API void THStorage_(swap)(THStorage *storage1, THStorage *storage2);
-
-/* might differ with other API (like CUDA) */
-TH_API void THStorage_(free)(THStorage *storage);
-TH_API void THStorage_(resizeBytes)(THStorage* storage, ptrdiff_t size_bytes);
-TH_API void THStorage_(fill)(THStorage *storage, scalar_t value);
-
-#endif
--- a/aten/src/TH/generic/THTensor.cpp
+++ b/aten/src/TH/generic/THTensor.cpp
@ -10,7 +10,7 @@

 /**** creation methods ****/

-THTensor *THTensor_(newWithStorage1d)(THStorage *storage, ptrdiff_t storageOffset,
+THTensor *THTensor_(newWithStorage1d)(c10::StorageImpl *storage, ptrdiff_t storageOffset,
                               int64_t size0, int64_t stride0)
 {
  c10::raw::intrusive_ptr::incref(storage);
--- a/aten/src/TH/generic/THTensor.h
+++ b/aten/src/TH/generic/THTensor.h
@ -24,7 +24,7 @@
 #define THComplexDoubleTensor THTensor

 /**** creation methods ****/
-TH_API THTensor *THTensor_(newWithStorage1d)(THStorage *storage_, ptrdiff_t storageOffset_,
+TH_API THTensor *THTensor_(newWithStorage1d)(c10::StorageImpl *storage_, ptrdiff_t storageOffset_,
                                int64_t size0_, int64_t stride0_);

 #endif
--- a/aten/src/THC/CMakeLists.txt
+++ b/aten/src/THC/CMakeLists.txt
@ -3,11 +3,9 @@ set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE}
 PARENT_SCOPE)

 set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS}
-  ${CMAKE_CURRENT_SOURCE_DIR}/THCStorage.cpp
  ${CMAKE_CURRENT_SOURCE_DIR}/THCTensor.cpp

  ${CMAKE_CURRENT_SOURCE_DIR}/THCSleep.cu
-  ${CMAKE_CURRENT_SOURCE_DIR}/THCStorage.cu
  PARENT_SCOPE)

 install(FILES
@ -15,19 +13,14 @@ install(FILES
          THCGeneral.h
          THCGeneral.hpp
          THCSleep.h
-          THCStorage.h
          THCTensor.h
          THCAtomics.cuh
          THCGenerateByteType.h
          # See Note [TH abstraction violation]
          THCTensor.hpp
-          THCStorage.hpp
          DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/THC")

 install(FILES
-          generic/THCStorage.cpp
-          generic/THCStorage.cu
-          generic/THCStorage.h
          generic/THCTensor.cpp
          generic/THCTensor.h
          DESTINATION "${ATEN_INSTALL_INCLUDE_SUBDIR}/THC/generic")
--- a/aten/src/THC/THC.h
+++ b/aten/src/THC/THC.h
@ -4,8 +4,6 @@
 #include <THC/THCGeneral.h>
 #include <c10/cuda/CUDACachingAllocator.h>
 #include <THC/THCSleep.h>
-#include <THC/THCStorage.h>
-
 #include <THC/THCTensor.h>

 #endif
--- a/aten/src/THC/THCStorage.cpp
+++ b/aten/src/THC/THCStorage.cpp
@ -1,38 +0,0 @@
-#include <THC/THCStorage.hpp>
-#include <THC/THCGeneral.h>
-
-#include <TH/THHalf.h>
-
-#include <new>
-#include <c10/cuda/CUDACachingAllocator.h>
-
-#include <THC/generic/THCStorage.cpp>
-#include <THC/THCGenerateByteType.h>
-
-#include <c10/util/intrusive_ptr.h>
-#include <ATen/native/cuda/Resize.h>
-
-void THCStorage_resizeBytes(
-    THCState* state,
-    THCStorage* self,
-    ptrdiff_t size_bytes_i) {
-  TORCH_CHECK(!c10::overflows<size_t>(size_bytes_i),
-              "Requested storage size (", size_bytes_i,
-              ") cannot be represented as a size_t");
-  const auto size_bytes = static_cast<size_t>(size_bytes_i);
-  at::native::resize_bytes_cuda(self, size_bytes);
-}
-
-int THCStorage_getDevice(THCState* state, const THCStorage* storage) {
-  return storage->device().index();
-}
-
-THCStorage* THCStorage_new(THCState* state) {
-  THStorage* storage = c10::make_intrusive<at::StorageImpl>(
-                           c10::StorageImpl::use_byte_size_t(),
-                           0,
-                           c10::cuda::CUDACachingAllocator::get(),
-                           true)
-                           .release();
-  return storage;
-}
--- a/aten/src/THC/THCStorage.cu
+++ b/aten/src/THC/THCStorage.cu
@ -1,11 +0,0 @@
-#include <THC/THCStorage.hpp>
-
-#include <ATen/cuda/ThrustAllocator.h>
-#include <thrust/device_ptr.h>
-#include <thrust/fill.h>
-#if (defined(CUDA_VERSION) && CUDA_VERSION >= 7000) || defined(USE_ROCM)
-#include <thrust/system/cuda/execution_policy.h>
-#endif
-
-#include <THC/generic/THCStorage.cu>
-#include <THC/THCGenerateByteType.h>
--- a/aten/src/THC/THCStorage.h
+++ b/aten/src/THC/THCStorage.h
@ -1,12 +0,0 @@
-#ifndef THC_STORAGE_INC
-#define THC_STORAGE_INC
-
-#include <TH/THStorageFunctions.h>
-#include <THC/THCGeneral.h>
-
-#define THCStorage_(NAME) TH_CONCAT_4(TH,CReal,Storage_,NAME)
-
-#include <THC/generic/THCStorage.h>
-#include <THC/THCGenerateByteType.h>
-
-#endif
--- a/aten/src/THC/THCStorage.hpp
+++ b/aten/src/THC/THCStorage.hpp
@ -1,32 +0,0 @@
-#pragma once
-
-// STOP!!! Thinking of including this header directly?  Please
-// read Note [TH abstraction violation]
-
-#include <THC/THCStorage.h>
-// Should work with THStorageClass
-#include <TH/THStorageFunctions.hpp>
-
-#include <c10/core/ScalarType.h>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <cuda_fp16.h>
-
-TORCH_CUDA_CU_API THCStorage* THCStorage_new(THCState* state);
-
-TORCH_CUDA_CU_API void THCStorage_retain(THCState* state, THCStorage* storage);
-
-TORCH_CUDA_CU_API void THCStorage_resizeBytes(
-    THCState* state,
-    THCStorage* storage,
-    ptrdiff_t size_bytes);
-TORCH_CUDA_CU_API int THCStorage_getDevice(
-    THCState* state,
-    const THCStorage* storage);
-
-TORCH_CUDA_CU_API THCStorage* THCStorage_newWithDataAndAllocator(
-    THCState* state,
-    at::DataPtr&& data,
-    ptrdiff_t size,
-    at::Allocator* allocator);
--- a/aten/src/THC/THCTensor.cpp
+++ b/aten/src/THC/THCTensor.cpp
@ -6,7 +6,7 @@
 #include <THC/generic/THCTensor.cpp>
 #include <THC/THCGenerateByteType.h>

-void THCTensor_setStorage(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_)
+void THCTensor_setStorage(THCState *state, THCTensor *self, c10::StorageImpl *storage_, ptrdiff_t storageOffset_, at::IntArrayRef size_, at::IntArrayRef stride_)
 {
  c10::raw::intrusive_ptr::incref(storage_);
  THTensor_wrap(self).set_(at::Storage(c10::intrusive_ptr<at::StorageImpl>::reclaim(storage_)),
--- a/aten/src/THC/THCTensor.h
+++ b/aten/src/THC/THCTensor.h
@ -2,7 +2,6 @@
 #define THC_TENSOR_INC

 #include <TH/THTensor.h>
-#include <THC/THCStorage.h>
 #include <THC/THCGeneral.h>

 #define THCTensor_(NAME)   TH_CONCAT_4(TH,CReal,Tensor_,NAME)
--- a/aten/src/THC/THCTensor.hpp
+++ b/aten/src/THC/THCTensor.hpp
@ -5,7 +5,6 @@

 #include <THC/THCTensor.h>
 #include <TH/THTensor.hpp>
-#include <THC/THCStorage.hpp>
 #include <THC/THCGeneral.hpp>

 #include <ATen/ATen.h>
@ -13,7 +12,7 @@
 TORCH_CUDA_CU_API void THCTensor_setStorage(
    THCState* state,
    THCTensor* self,
-    THCStorage* storage_,
+    c10::StorageImpl* storage_,
    ptrdiff_t storageOffset_,
    at::IntArrayRef size_,
    at::IntArrayRef stride_);
--- a/aten/src/THC/generic/THCStorage.cpp
+++ b/aten/src/THC/generic/THCStorage.cpp
@ -1,86 +0,0 @@
-#ifndef THC_GENERIC_FILE
-#define THC_GENERIC_FILE "THC/generic/THCStorage.cpp"
-#else
-
-#include <c10/util/intrusive_ptr.h>
-#include <c10/util/typeid.h>
-
-
-void THCStorage_(set)(THCState *state, THCStorage *self, ptrdiff_t index, scalar_t value)
-{
-  THArgCheck(
-      (index >= 0) && (index < static_cast<int64_t>(self->nbytes() / sizeof(scalar_t))),
-      2,
-      "index out of bounds");
-  cudaStream_t stream = c10::cuda::getCurrentCUDAStream();
-  at::cuda::memcpy_and_sync(self->data<scalar_t>() + index, &value, sizeof(scalar_t),
-                              cudaMemcpyHostToDevice,
-                              stream);
-}
-
-scalar_t THCStorage_(get)(THCState *state, const THCStorage *self, ptrdiff_t index)
-{
-  THArgCheck(
-      (index >= 0) && (index < static_cast<int64_t>(self->nbytes() / sizeof(scalar_t))),
-      2,
-      "index out of bounds");
-  scalar_t value;
-  cudaStream_t stream = c10::cuda::getCurrentCUDAStream();
-  at::cuda::memcpy_and_sync(&value, self->data<scalar_t>() + index, sizeof(scalar_t),
-                                  cudaMemcpyDeviceToHost, stream);
-  return value;
-}
-
-THCStorage* THCStorage_(new)(THCState *state)
-{
-  THStorage* storage = c10::make_intrusive<at::StorageImpl>(
-                           c10::StorageImpl::use_byte_size_t(),
-                           0,
-                           c10::cuda::CUDACachingAllocator::get(),
-                           true)
-                           .release();
-  return storage;
-}
-
-THCStorage* THCStorage_(newWithSize)(THCState *state, ptrdiff_t size)
-{
-  THStorage* storage = c10::make_intrusive<at::StorageImpl>(
-                           c10::StorageImpl::use_byte_size_t(),
-                           size * sizeof(scalar_t),
-                           c10::cuda::CUDACachingAllocator::get(),
-                           true)
-                           .release();
-  return storage;
-}
-
-THCStorage* THCStorage_(newWithMapping)(THCState *state, const char *fileName, ptrdiff_t size, int isShared)
-{
-  THError("not available yet for THCStorage");
-  return NULL;
-}
-
-THCStorage* THCStorage_(newWithDataAndAllocator)(
-    THCState* state,
-    at::DataPtr&& data,
-    ptrdiff_t size,
-    at::Allocator* allocator) {
-  THStorage* storage = c10::make_intrusive<at::StorageImpl>(
-                           c10::StorageImpl::use_byte_size_t(),
-                           size * sizeof(scalar_t),
-                           std::move(data),
-                           allocator,
-                           allocator != nullptr)
-                           .release();
-  return storage;
-}
-
-void THCStorage_(retain)(THCState *state, THCStorage *self)
-{
-  THStorage_retain(self);
-}
-
-void THCStorage_(free)(THCState *state, THCStorage *self)
-{
-  THStorage_free(self);
-}
-#endif
--- a/aten/src/THC/generic/THCStorage.cu
+++ b/aten/src/THC/generic/THCStorage.cu
@ -1,28 +0,0 @@
-#ifndef THC_GENERIC_FILE
-#define THC_GENERIC_FILE "THC/generic/THCStorage.cu"
-#else
-
-void THCStorage_(fill)(THCState *state, THCStorage *self, scalar_t value)
-{
-  at::cuda::ThrustAllocator thrustAlloc;
-  thrust::device_ptr<scalar_t> self_data(self->data<scalar_t>());
-
-  thrust::fill(
-#if (defined(CUDA_VERSION) && CUDA_VERSION >= 7000) || defined(USE_ROCM)
-      thrust::cuda::par(thrustAlloc).on(c10::cuda::getCurrentCUDAStream()),
-#endif
-      self_data,
-      self_data + (self->nbytes() / sizeof(scalar_t)),
-      value);
-}
-
-void THCStorage_(
-    resizeBytes)(THCState* state, THCStorage* self, ptrdiff_t size_bytes) {
-  THCStorage_resizeBytes(state, self, size_bytes);
-}
-
-int THCStorage_(getDevice)(THCState* state, const THCStorage* storage) {
-  return THCStorage_getDevice(state, storage);
-}
-
-#endif
--- a/aten/src/THC/generic/THCStorage.h
+++ b/aten/src/THC/generic/THCStorage.h
@ -1,49 +0,0 @@
-#ifndef THC_GENERIC_FILE
-#define THC_GENERIC_FILE "THC/generic/THCStorage.h"
-#else
-
-#define THCStorage THStorage
-
-// These used to be distinct types; for some measure of backwards compatibility and documentation
-// alias these to the single THCStorage type.
-#define THCudaStorage                       THCStorage
-#define THCudaByteStorage                   THCStorage
-
-/* slow access -- checks everything */
-TORCH_CUDA_CU_API void THCStorage_(
-    set)(THCState* state, THCStorage*, ptrdiff_t, scalar_t);
-TORCH_CUDA_CU_API scalar_t
-    THCStorage_(get)(THCState* state, const THCStorage*, ptrdiff_t);
-
-TORCH_CUDA_CU_API THCStorage* THCStorage_(new)(THCState* state);
-TORCH_CUDA_CU_API THCStorage* THCStorage_(
-    newWithSize)(THCState* state, ptrdiff_t size);
-TORCH_CUDA_CU_API THCStorage* THCStorage_(newWithMapping)(
-    THCState* state,
-    const char* filename,
-    ptrdiff_t size,
-    int shared);
-
-TORCH_CUDA_CU_API THCStorage* THCStorage_(newWithDataAndAllocator)(
-    THCState* state,
-    at::DataPtr&& data,
-    ptrdiff_t size,
-    at::Allocator* allocator);
-
-TORCH_CUDA_CU_API void THCStorage_(
-    setFlag)(THCState* state, THCStorage* storage, const char flag);
-TORCH_CUDA_CU_API void THCStorage_(
-    clearFlag)(THCState* state, THCStorage* storage, const char flag);
-TORCH_CUDA_CU_API void THCStorage_(
-    retain)(THCState* state, THCStorage* storage);
-
-TORCH_CUDA_CU_API void THCStorage_(free)(THCState* state, THCStorage* storage);
-TORCH_CUDA_CU_API void THCStorage_(
-    resizeBytes)(THCState* state, THCStorage* storage, ptrdiff_t size_bytes);
-TORCH_CUDA_CU_API void THCStorage_(
-    fill)(THCState* state, THCStorage* storage, scalar_t value);
-
-TORCH_CUDA_CU_API int THCStorage_(
-    getDevice)(THCState* state, const THCStorage* storage);
-
-#endif
--- a/aten/src/THC/generic/THCTensor.cpp
+++ b/aten/src/THC/generic/THCTensor.cpp
@ -7,7 +7,7 @@

 /**** creation methods ****/

-THCTensor *THCTensor_(newWithStorage1d)(THCState *state, THCStorage *storage, ptrdiff_t storageOffset,
+THCTensor *THCTensor_(newWithStorage1d)(THCState *state, c10::StorageImpl *storage, ptrdiff_t storageOffset,
                               int64_t size0, int64_t stride0)
 {
  c10::raw::intrusive_ptr::incref(storage);
--- a/aten/src/THC/generic/THCTensor.h
+++ b/aten/src/THC/generic/THCTensor.h
@ -20,12 +20,12 @@
 #define THCudaComplexDoubleTensor   THCTensor

 /**** access methods ****/
-TORCH_CUDA_CU_API THCStorage* THCTensor_(
+TORCH_CUDA_CU_API c10::StorageImpl* THCTensor_(
    storage)(THCState* state, const THCTensor* self);
 /**** creation methods ****/
 TORCH_CUDA_CU_API THCTensor* THCTensor_(newWithStorage1d)(
    THCState* state,
-    THCStorage* storage_,
+    c10::StorageImpl* storage_,
    ptrdiff_t storageOffset_,
    int64_t size0_,
    int64_t stride0_);
--- a/c10/util/intrusive_ptr.h
+++ b/c10/util/intrusive_ptr.h
@ -66,7 +66,7 @@ class C10_API intrusive_ptr_target {
  //      plus one more if refcount > 0
  //    An invariant: refcount > 0  =>  weakcount > 0
  //
-  //  - THStorage stays live as long as there are any strong
+  //  - c10::StorageImpl stays live as long as there are any strong
  //    or weak pointers to it (weakcount > 0, since strong
  //    references count as a +1 to weakcount)
  //
@ -75,7 +75,7 @@ class C10_API intrusive_ptr_target {
  //  - Once refcount == 0, it can never again be > 0 (the transition
  //    from > 0 to == 0 is monotonic)
  //
-  //  - When you access THStorage via a weak pointer, you must
+  //  - When you access c10::StorageImpl via a weak pointer, you must
  //    atomically increment the use count, if it is greater than 0.
  //    If it is not, you must report that the storage is dead.
  //
--- a/tools/build_variables.bzl
+++ b/tools/build_variables.bzl
@ -1229,7 +1229,6 @@ aten_native_source_non_codegen_list = [
    "aten/src/ATen/native/sparse/SparseTensorMath.cpp",
    "aten/src/ATen/native/sparse/SparseCsrTensorMath.cpp",
    "aten/src/TH/THGeneral.cpp",
-    "aten/src/TH/THStorageFunctions.cpp",
    "aten/src/TH/THTensor.cpp",
    "aten/src/ATen/native/utils/Factory.cpp",
    "aten/src/ATen/native/xnnpack/Activation.cpp",
--- a/torch/csrc/DynamicTypes.cpp
+++ b/torch/csrc/DynamicTypes.cpp
@ -116,7 +116,7 @@ PyObject* createPyObject(
  auto type = getPyTypeObject(storage, data_type);
  auto obj = THPObjectPtr(type->tp_alloc(type, 0));
  if (!obj) throw python_error();
-  ((THPVoidStorage*)obj.get())->cdata = (THVoidStorage *)at::Storage(/* copy */ storage).unsafeReleaseStorageImpl();
+  ((THPVoidStorage*)obj.get())->cdata = at::Storage(/* copy */ storage).unsafeReleaseStorageImpl();
  return obj.release();
 }

--- a/torch/csrc/PythonTypes.h
+++ b/torch/csrc/PythonTypes.h
@ -14,7 +14,7 @@ struct THPVoidTensor {

 struct THPVoidStorage {
  PyObject_HEAD
-  THVoidStorage *cdata;
+  c10::StorageImpl *cdata;
 };

 } // namespace torch
--- a/torch/csrc/README.md
+++ b/torch/csrc/README.md
@ -30,7 +30,8 @@ One important case where this assumption is important is when tracking
 the CUDA device a tensor is stored in: this information is stored
 solely in the storage, so if a storage is nullptr, we lose this information.

-Although storage is never nullptr, the data field of THStorage may be nullptr.  This
+Although storage is never nullptr, the data field of c10::StorageImpl may be
+nullptr.  This
 mostly occurs when we want to pre-allocate an output tensor struct, but then
 have it be resized and filled with data by some operator: there's no point in
 allocating data for it in this case!
--- a/torch/csrc/Storage.cpp
+++ b/torch/csrc/Storage.cpp
@ -4,12 +4,9 @@
 #endif
 #include <structmember.h>

-#define THP_HOST_HALF
-
 #include <TH/TH.h>
 // See Note [TH abstraction violation]
 //  - Used to get at the allocator associated with a storage
-#include <TH/THStorageFunctions.hpp>
 #include <libshm.h>
 #include <torch/csrc/THP.h>
 #include <torch/csrc/copy_utils.h>
@ -17,6 +14,7 @@
 #include <torch/csrc/CudaIPCTypes.h>
 #include <torch/csrc/Device.h>
 #include <torch/csrc/autograd/utils/wrap_outputs.h>
+#include <c10/core/CPUAllocator.h>

 #include <fmt/format.h>

@ -24,9 +22,11 @@
 #include <torch/csrc/generic/Storage.cpp>
 #include <TH/THGenerateByteType.h>

+#include <c10/util/intrusive_ptr.h>
+
 template<>
-void THPPointer<THStorage>::free() {
+void THPPointer<c10::StorageImpl>::free() {
  if (ptr) {
-    THStorage_free(ptr);
+    c10::raw::intrusive_ptr::decref(ptr);
  }
 }
--- a/torch/csrc/StorageDefs.h
+++ b/torch/csrc/StorageDefs.h
@ -1,5 +1,5 @@
 #pragma once
 struct THPStorage {
  PyObject_HEAD
-  THWStorage *cdata;
+  c10::StorageImpl *cdata;
 };
--- a/torch/csrc/THP.h
+++ b/torch/csrc/THP.h
@ -23,8 +23,6 @@
 #define LIBRARY_STATE_TYPE
 #define LIBRARY_STATE_TYPE_NOARGS

-#define THWStorage THStorage
-#define THWStorage_(NAME) THStorage_(NAME)
 #define THWTensor THTensor
 #define THWTensor_(NAME) THTensor_(NAME)

--- a/torch/csrc/Types.h
+++ b/torch/csrc/Types.h
@ -12,8 +12,6 @@ template <typename T> struct THPTypeInfo {};

 namespace torch {

-typedef THStorage THVoidStorage;
-
 typedef THTensor THVoidTensor;

 }  // namespace torch
--- a/torch/csrc/cuda/override_macros.h
+++ b/torch/csrc/cuda/override_macros.h
@ -1,12 +1,9 @@
 #include <torch/csrc/cuda/undef_macros.h>

-#define THWStoragePtr THCStoragePtr
 #define THPStoragePtr THCPStoragePtr
 #define THWTensorPtr THCTensorPtr
 #define THPTensorPtr THCPTensorPtr

-#define THWStorage THCStorage
-#define THWStorage_(NAME) THCStorage_(NAME)
 #define THWTensor THCTensor
 #define THWTensor_(NAME) THCTensor_(NAME)

@ -50,5 +47,3 @@

 #define THHostTensor TH_CONCAT_3(TH,Real,Tensor)
 #define THHostTensor_(NAME) TH_CONCAT_4(TH,Real,Tensor_,NAME)
-#define THHostStorage TH_CONCAT_3(TH,Real,Storage)
-#define THHostStorage_(NAME) TH_CONCAT_4(TH,Real,Storage_,NAME)
--- a/torch/csrc/cuda/restore_macros.h
+++ b/torch/csrc/cuda/restore_macros.h
@ -11,7 +11,6 @@
 #define THPStorageClass TH_CONCAT_3(THP,Real,StorageClass)
 #define THPStorage_(NAME) TH_CONCAT_4(THP,Real,Storage_,NAME)

-#define THWStoragePtr TH_CONCAT_3(TH,Real,StoragePtr)
 #define THWTensorPtr  TH_CONCAT_3(TH,Real,TensorPtr)
 #define THPStoragePtr TH_CONCAT_3(THP,Real,StoragePtr)
 #define THPTensorPtr  TH_CONCAT_3(THP,Real,TensorPtr)
--- a/torch/csrc/cuda/undef_macros.h
+++ b/torch/csrc/cuda/undef_macros.h
@ -21,12 +21,9 @@
 #undef THPStorageClass
 #undef THPStorageType

-#undef THWStorage
-#undef THWStorage_
 #undef THWTensor
 #undef THWTensor_

-#undef THWStoragePtr
 #undef THPStoragePtr
 #undef THWTensorPtr
 #undef THPTensorPtr
@ -48,5 +45,3 @@

 #undef THHostTensor
 #undef THHostTensor_
-#undef THHostStorage
-#undef THHostStorage_
--- a/torch/csrc/cuda/utils.h
+++ b/torch/csrc/cuda/utils.h
@ -3,7 +3,6 @@

 #define THCPUtils_(NAME) TH_CONCAT_4(THCP,Real,Utils_,NAME)

-#define THCStoragePtr  TH_CONCAT_3(THC,Real,StoragePtr)
 #define THCTensorPtr   TH_CONCAT_3(THC,Real,TensorPtr)
 #define THCPStoragePtr TH_CONCAT_3(THCP,Real,StoragePtr)
 #define THCPTensorPtr  TH_CONCAT_3(THCP,Real,TensorPtr)
--- a/torch/csrc/generic/Storage.cpp
+++ b/torch/csrc/generic/Storage.cpp
@ -4,7 +4,7 @@

 PyObject *THPStorageClass = nullptr;

-PyObject * THPStorage_(New)(THWStorage *ptr)
+PyObject * THPStorage_(New)(c10::StorageImpl *ptr)
 {
  AT_ASSERT(ptr);
  PyTypeObject *type = (PyTypeObject *)THPStorageClass;
@ -12,25 +12,17 @@ PyObject * THPStorage_(New)(THWStorage *ptr)
  if (obj) {
    ((THPStorage *)obj)->cdata = ptr;
  } else {
-    THWStorage_(free)(LIBRARY_STATE ptr);
+    c10::raw::intrusive_ptr::decref(ptr);
  }
  return obj;
 }

 static void THPStorage_(dealloc)(THPStorage* self)
 {
-  THWStorage_(free)(LIBRARY_STATE self->cdata);
-  Py_TYPE(self)->tp_free((PyObject*)self);
+  if (self->cdata) {
+    c10::raw::intrusive_ptr::decref(self->cdata);
  }
-
-static THWStorage* THPStorage_(newWithAllocator)(int64_t size, at::Allocator* allocator)
-{
-#if defined(THC_GENERIC_FILE)
-  THPUtils_setError(THPStorageStr " does not support custom allocators");
-  return nullptr;
-#else
-  return THWStorage_(newWithAllocator)(LIBRARY_STATE size, allocator);
-#endif
+  Py_TYPE(self)->tp_free((PyObject*)self);
 }

 static PyObject * THPStorage_(pynew)(PyTypeObject *type, PyObject *args, PyObject *kwargs)
@ -55,22 +47,28 @@ static PyObject * THPStorage_(pynew)(PyTypeObject *type, PyObject *args, PyObjec
    if (num_args == 0) {
      PyObject *cdata_ptr = PyDict_GetItemString(kwargs, "cdata");
      if (num_kwargs == 1 && cdata_ptr && THPUtils_checkLong(cdata_ptr)) {
-        THWStorage *ptr = (THWStorage*)PyLong_AsVoidPtr(cdata_ptr);
+        c10::StorageImpl *ptr = (c10::StorageImpl*)PyLong_AsVoidPtr(cdata_ptr);
        self->cdata = ptr;
        return (PyObject*)self.release();
      }
    }
    THPUtils_assert(num_kwargs == 0, THPStorageStr "(): invalid keyword arguments");
  }
+  if (allocator == nullptr) {
+#if defined(THC_GENERIC_FILE)
+      allocator = c10::cuda::CUDACachingAllocator::get();
+#else
+      allocator = c10::GetDefaultCPUAllocator();
+#endif
+  }

  // torch.Storage()
  if (num_args == 0) {
-    // NOLINTNEXTLINE(bugprone-branch-clone)
-    if (allocator) {
-      self->cdata = THPStorage_(newWithAllocator)(0, allocator);
-    } else {
-      self->cdata = THWStorage_(new)(LIBRARY_STATE_NOARGS);
-    }
+    self->cdata = c10::make_intrusive<at::StorageImpl>(
+      c10::StorageImpl::use_byte_size_t(),
+      0,
+      allocator,
+      /*resizable=*/true).release();
    return (PyObject*)self.release();
  }

@ -79,12 +77,11 @@ static PyObject * THPStorage_(pynew)(PyTypeObject *type, PyObject *args, PyObjec
  // torch.Storage(size)
  if (num_args == 1 && THPUtils_checkLong(first_arg)) {
    int64_t size = THPUtils_unpackLong(first_arg);
-    // NOLINTNEXTLINE(bugprone-branch-clone)
-    if (allocator) {
-      self->cdata = THPStorage_(newWithAllocator)(size, allocator);
-    } else {
-      self->cdata = THWStorage_(newWithSize)(LIBRARY_STATE size);
-    }
+    self->cdata = c10::make_intrusive<at::StorageImpl>(
+      c10::StorageImpl::use_byte_size_t(),
+      size,
+      allocator,
+      /*resizable=*/true).release();
    return (PyObject*)self.release();
  }

@ -93,7 +90,12 @@ static PyObject * THPStorage_(pynew)(PyTypeObject *type, PyObject *args, PyObjec
    Py_ssize_t length = PySequence_Length(first_arg);
    THPUtils_assert(length >= 0, "couldn't obtain the length of %s",
        THPUtils_typename(first_arg));
-    self->cdata = THWStorage_(newWithSize)(LIBRARY_STATE length);
+    self->cdata = c10::make_intrusive<at::StorageImpl>(
+      c10::StorageImpl::use_byte_size_t(),
+      length,
+      allocator,
+      /*resizable=*/true)
+      .release();
    THPObjectPtr item;
    try {
      for (Py_ssize_t i = 0; i < length; i++) {
@ -104,7 +106,10 @@ static PyObject * THPStorage_(pynew)(PyTypeObject *type, PyObject *args, PyObjec
        self->cdata->unsafe_data<scalar_t>()[i] = value;
 #else
        // TODO: this might be slow - consider batched updates?
-        THCStorage_(set)(LIBRARY_STATE self->cdata, i, value);
+        storage_set(
+          at::unsafeStorageFromTH(self->cdata, /*retain=*/true),
+          i,
+          value);
 #endif
      }
    } catch (const std::exception &e) {
@ -147,7 +152,7 @@ static PyObject * THPStorage_(get)(THPStorage *self, PyObject *index)
            nindex, self->cdata->nbytes() / sizeof(scalar_t)));
      return nullptr;
    }
-    scalar_t value = THWStorage_(get)(LIBRARY_STATE self->cdata, nindex);
+    scalar_t value = storage_get(at::unsafeStorageFromTH(self->cdata, /*retain=*/true), nindex);
    return THPUtils_(newReal)(value);
  /* Slice index */
  } else if (PySlice_Check(index)) {
@ -205,7 +210,10 @@ static int THPStorage_(set)(THPStorage *self, PyObject *index, PyObject *value)
  scalar_t rvalue = THPUtils_(unpackReal)(value);
  if (THPUtils_checkLong(index)) {
    int64_t nindex = THPUtils_unpackLong(index);
-    THWStorage_(set)(LIBRARY_STATE self->cdata, nindex, rvalue);
+    storage_set(
+      at::unsafeStorageFromTH(self->cdata, /*retain=*/true),
+      nindex,
+      rvalue);
    return 0;
  } else if (PySlice_Check(index)) {
    // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
@ -221,7 +229,10 @@ static int THPStorage_(set)(THPStorage *self, PyObject *index, PyObject *value)
    // TODO: check the bounds only once
    // TODO: fill?
    for (;start < stop; start++)
-      THWStorage_(set)(LIBRARY_STATE self->cdata, start, rvalue);
+      storage_set(
+        at::unsafeStorageFromTH(self->cdata, /*retain=*/true),
+        start,
+        rvalue);
    return 0;
  }
  THPUtils_setError("can't index a " THPStorageStr " with %s",
--- a/torch/csrc/generic/Storage.h
+++ b/torch/csrc/generic/Storage.h
@ -4,7 +4,7 @@

 #include <torch/csrc/StorageDefs.h>

-THP_API PyObject * THPStorage_(New)(THWStorage *ptr);
+THP_API PyObject * THPStorage_(New)(c10::StorageImpl *ptr);
 extern PyObject *THPStorageClass;

 #include <torch/csrc/Types.h>
--- a/torch/csrc/generic/StorageMethods.cpp
+++ b/torch/csrc/generic/StorageMethods.cpp
@ -8,6 +8,13 @@
 #include <cuda_runtime.h>
 #endif

+#if !defined(THC_GENERIC_FILE)
+#include <c10/core/CPUAllocator.h>
+#include <ATen/native/Resize.h>
+#else
+#include <ATen/native/cuda/Resize.h>
+#endif
+
 #ifdef _MSC_VER
 #define LSEEK _lseeki64
 #else
@ -79,9 +86,18 @@ static PyObject * THPStorage_(new)(PyObject *_self, PyObject *noargs)
 {
  HANDLE_TH_ERRORS
  auto self = (THPStorage*)_self;
-  THWStoragePtr new_storage(THWStorage_(new)(LIBRARY_STATE_NOARGS));
+  auto new_storage = c10::make_intrusive<at::StorageImpl>(
+    c10::StorageImpl::use_byte_size_t(),
+    0,
+#if defined(THC_GENERIC_FILE)
+    c10::cuda::CUDACachingAllocator::get(),
+#else
+    c10::GetDefaultCPUAllocator(),
+#endif
+    /*resizable=*/true);
+
  // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
-  PyObject *_ret = THPStorage_(New)(new_storage);
+  PyObject *_ret = THPStorage_(New)(new_storage.get());
  new_storage.release();
  return _ret;
  END_HANDLE_TH_ERRORS
@ -94,8 +110,16 @@ static PyObject * THPStorage_(resize_)(PyObject *_self, PyObject *number_arg)
  THPUtils_assert(THPUtils_checkLong(number_arg), "resize_ expects an int, "
      "but got %s", THPUtils_typename(number_arg));
  int64_t newsize = THPUtils_unpackLong(number_arg);
-  THWStorage_(resizeBytes)(
-      LIBRARY_STATE self->cdata, newsize * sizeof(scalar_t));
+#if defined(THC_GENERIC_FILE)
+  ptrdiff_t size_bytes_i = newsize;
+  TORCH_CHECK(!c10::overflows<size_t>(size_bytes_i),
+              "Requested storage size (", size_bytes_i,
+              ") cannot be represented as a size_t");
+  const auto size_bytes = static_cast<size_t>(size_bytes_i);
+  at::native::resize_bytes_cuda(self->cdata, size_bytes);
+#else
+  at::native::resize_bytes_cpu(self->cdata, newsize);
+#endif
  Py_INCREF(self);
  return (PyObject*)self;
  END_HANDLE_TH_ERRORS
@ -108,7 +132,9 @@ static PyObject * THPStorage_(fill_)(PyObject *_self, PyObject *number_arg)
  THPUtils_assert(THPUtils_(checkReal)(number_arg), "fill_ expects %s, "
      "but got %s", THPUtils_typeTraits<scalar_t>::python_type_str,
      THPUtils_typename(number_arg));
-  THWStorage_(fill)(LIBRARY_STATE self->cdata, THPUtils_(unpackReal)(number_arg));
+  storage_fill(
+    at::unsafeStorageFromTH(self->cdata, /*retain=*/true),
+    THPUtils_(unpackReal)(number_arg));
  Py_INCREF(self);
  return (PyObject*)self;
  END_HANDLE_TH_ERRORS
@ -197,12 +223,21 @@ static PyObject * THPStorage_(fromBuffer)(PyObject *_unused, PyObject *args, PyO
  }

  uint8_t* src = (uint8_t*) buffer.buf;
-  THWStorage* storage = THWStorage_(newWithSize)(size_bytes);
+  c10::StorageImpl* storage = c10::make_intrusive<at::StorageImpl>(
+    c10::StorageImpl::use_byte_size_t(),
+    size_bytes,
+#if defined(THC_GENERIC_FILE)
+    c10::cuda::CUDACachingAllocator::get(),
+#else
+    c10::GetDefaultCPUAllocator(),
+#endif
+    /*resizable=*/true)
+    .release();

  if (scalar_type == at::kByte || scalar_type == at::kChar) {
    memcpy(storage->data(), src + offset, count);
  } else if (scalar_type == at::kBool) {
-    // Because of ASAN checks, that are failing in the THStorage.cpp whenever
+    // Because of ASAN checks, that are failing whenever
    // we are trying to get a value which is not 0 or 1, we have to manually
    // convert original values to boolean ones.
    torch::utils::THP_decodeBoolBuffer(
@ -269,7 +304,27 @@ static PyObject * THPStorage_(fromFile)(PyObject *_unused, PyObject *args, PyObj
  }
  if (shared)
    shared = at::ALLOCATOR_MAPPED_SHARED;
-  THWStorage *storage = THWStorage_(newWithMapping)(LIBRARY_STATE filename, nbytes, shared);
+  c10::StorageImpl* storage;
+
+#ifdef THC_GENERIC_FILE
+  THError("not available yet for CUDA");
+  storage = NULL;
+#else
+  size_t actual_nbytes = -1;
+  storage = c10::make_intrusive<at::StorageImpl>(
+    c10::StorageImpl::use_byte_size_t(),
+    nbytes,
+    at::MapAllocator::makeDataPtr(
+      filename, shared, nbytes, &actual_nbytes),
+    /*allocator=*/nullptr,
+    /*resizable=*/false)
+    .release();
+
+  if (nbytes <= 0) {
+    storage->set_nbytes(actual_nbytes);
+  }
+#endif
+
  return (PyObject*)THPStorage_(New)(storage);
  END_HANDLE_TH_ERRORS
 }
@ -314,7 +369,7 @@ PyObject * THPStorage_(newWithFile)(PyObject *_unused, PyObject *args)
                  "_new_with_file: need to specify element size");
  uint64_t element_size = THPUtils_unpackUInt64(element_size_obj);

-  THWStorage *storage = THPStorage_(readFileRaw<int>)(fd, nullptr, element_size);
+  c10::StorageImpl *storage = THPStorage_(readFileRaw<int>)(fd, nullptr, element_size);
  if (storage == nullptr)
    return nullptr;
  PyObject *result = THPStorage_(New)(storage);
@ -341,7 +396,7 @@ static PyObject *THPStorage_(setFromFile)(PyObject *_self, PyObject *args)
    // but it is currently unnecessary to support this.
    THPUtils_assert(offset == Py_None,
                    "_set_from_file: offset is NYI for filelike objects");
-    THWStorage *storage = THPStorage_(readFileRaw<PyObject*>)(file, self->cdata, element_size);
+    c10::StorageImpl *storage = THPStorage_(readFileRaw<PyObject*>)(file, self->cdata, element_size);
    if (storage == nullptr) {
      return nullptr;
    }
@ -357,7 +412,7 @@ static PyObject *THPStorage_(setFromFile)(PyObject *_self, PyObject *args)
  }
  THPUtils_assert(fd != -1, "_set_from_file couldn't retrieve a file "
      "descriptor from given object");
-  THWStorage *storage = THPStorage_(readFileRaw<int>)(fd, self->cdata, element_size);
+  c10::StorageImpl *storage = THPStorage_(readFileRaw<int>)(fd, self->cdata, element_size);
  if (storage == nullptr)
    return nullptr;
  Py_INCREF(self);
@ -382,7 +437,7 @@ PyObject * THPStorage_(getDevice)(PyObject *_self, PyObject *noargs)
 {
  HANDLE_TH_ERRORS
  auto self = (THPStorage*)_self;
-  return THPUtils_packInt32(THCStorage_(getDevice)(LIBRARY_STATE self->cdata));
+  return THPUtils_packInt32(self->cdata->device().index());
  END_HANDLE_TH_ERRORS
 }
 #endif
@ -394,9 +449,13 @@ PyObject * THPStorage_(_setCdata)(PyObject *_self, PyObject *new_cdata)
  THPUtils_assert(THPUtils_checkLong(new_cdata), "given an invalid argument to "
      "_set_cdata - expected an int or long, but got %s",
      THPUtils_typename(new_cdata));
-  THWStorage *ptr = (THWStorage*)PyLong_AsVoidPtr(new_cdata);
-  THWStorage_(retain)(LIBRARY_STATE ptr);
-  THWStorage_(free)(LIBRARY_STATE self->cdata);
+  c10::StorageImpl *ptr = (c10::StorageImpl*)PyLong_AsVoidPtr(new_cdata);
+  if (ptr) {
+    c10::raw::intrusive_ptr::incref(ptr);
+  }
+  if (self->cdata) {
+    c10::raw::intrusive_ptr::decref(self->cdata);
+  }
  self->cdata = ptr;
  Py_INCREF(self);
  return (PyObject*)self;
--- a/torch/csrc/generic/StorageSharing.cpp
+++ b/torch/csrc/generic/StorageSharing.cpp
@ -14,7 +14,7 @@ static PyObject * THPStorage_(sharedDecref)(PyObject *_self, PyObject *noargs)
  HANDLE_TH_ERRORS
  auto self = (THPStorage*)_self;
 #ifndef THC_GENERIC_FILE
-  THWStorage *storage = self->cdata;
+  c10::StorageImpl *storage = self->cdata;
  THManagedMapAllocator *ctx = THManagedMapAllocator::fromDataPtr(storage->data_ptr());
  if (ctx) {
    ctx->decref();
@ -30,7 +30,7 @@ static PyObject * THPStorage_(sharedIncref)(PyObject *_self, PyObject *noargs)
  HANDLE_TH_ERRORS
  auto self = (THPStorage*)_self;
 #ifndef THC_GENERIC_FILE
-  THWStorage *storage = self->cdata;
+  c10::StorageImpl *storage = self->cdata;
  THManagedMapAllocator *ctx = THManagedMapAllocator::fromDataPtr(storage->data_ptr());
  if (ctx) {
    ctx->incref();
@ -42,14 +42,6 @@ static PyObject * THPStorage_(sharedIncref)(PyObject *_self, PyObject *noargs)

 #ifndef THC_GENERIC_FILE

-static THWStorage* THPStorage_(newFilenameStorage)(ptrdiff_t size)
-{
-  int flags = at::ALLOCATOR_MAPPED_SHAREDMEM | at::ALLOCATOR_MAPPED_EXCLUSIVE;
-  std::string handle = at::NewProcessWideShmHandle();
-  return THWStorage_(newWithDataAndAllocator)(
-      THManagedMapAllocator::makeDataPtr("", handle.c_str(), flags, size * sizeof(scalar_t)), size, /* allocator */ nullptr);
-}
-
 static PyObject * THPStorage_(pyNewFilenameStorage)(PyObject *_unused, PyObject *args)
 {
  HANDLE_TH_ERRORS
@ -58,7 +50,16 @@ static PyObject * THPStorage_(pyNewFilenameStorage)(PyObject *_unused, PyObject
  if (!PyArg_ParseTuple(args, "L", &size)) {
    return nullptr;
  }
-  return THPStorage_(New)(THPStorage_(newFilenameStorage)(size));
+
+  int flags = at::ALLOCATOR_MAPPED_SHAREDMEM | at::ALLOCATOR_MAPPED_EXCLUSIVE;
+  std::string handle = at::NewProcessWideShmHandle();
+  return THPStorage_(New)(c10::make_intrusive<at::StorageImpl>(
+    c10::StorageImpl::use_byte_size_t(),
+    size,
+    THManagedMapAllocator::makeDataPtr("", handle.c_str(), flags, size),
+    /*allocator=*/nullptr,
+    /*resizable=*/false)
+    .release());
  END_HANDLE_TH_ERRORS
 }

@ -66,7 +67,7 @@ static PyObject * THPStorage_(shareFilename)(PyObject *_self, PyObject *noargs)
 {
  HANDLE_TH_ERRORS
  auto self = (THPStorage*)_self;
-  THWStorage *storage = self->cdata;
+  c10::StorageImpl *storage = self->cdata;
  // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
  THManagedMapAllocator *ctx;
  // Storage is already in shared memory, just return a handle
@ -75,14 +76,21 @@ static PyObject * THPStorage_(shareFilename)(PyObject *_self, PyObject *noargs)
  } else {
    // TODO: retry on collision
    // TODO: free GIL - but remember to reacquire it when an exception is thrown
-    THWStoragePtr new_storage(
-        THPStorage_(newFilenameStorage)(storage->nbytes() / sizeof(scalar_t)));
+    int flags = at::ALLOCATOR_MAPPED_SHAREDMEM | at::ALLOCATOR_MAPPED_EXCLUSIVE;
+    std::string handle = at::NewProcessWideShmHandle();
+    c10::StorageImpl* new_storage = c10::make_intrusive<at::StorageImpl>(
+      c10::StorageImpl::use_byte_size_t(),
+      storage->nbytes(),
+      THManagedMapAllocator::makeDataPtr("", handle.c_str(), flags, storage->nbytes()),
+      /*allocator=*/nullptr,
+      /*resizable=*/false)
+      .release();

-    at::Storage new_storage_aten = at::unsafeStorageFromTH(new_storage.get(), /*retain=*/true);
+    at::Storage new_storage_aten = at::unsafeStorageFromTH(new_storage, /*retain=*/true);
    at::Storage _self_aten = torch::createStorage(_self);
    storage_copy(new_storage_aten, _self_aten);

-    THWStorage_(swap)(storage, new_storage);
+    std::swap(*storage, *new_storage);
    ctx = THManagedMapAllocator::fromDataPtr(storage->data_ptr());
    AT_ASSERT(ctx);
  }
@ -121,14 +129,17 @@ static PyObject * THPStorage_(newSharedFilename)(PyObject *_unused, PyObject *ar
  int flags = at::ALLOCATOR_MAPPED_SHAREDMEM |
              at::ALLOCATOR_MAPPED_NOCREATE;
  return THPStorage_(New)(
-          THWStorage_(newWithDataAndAllocator)(
-            THManagedMapAllocator::makeDataPtr(manager_handle, object_handle, flags, size * sizeof(scalar_t)),
+    c10::make_intrusive<at::StorageImpl>(
+      c10::StorageImpl::use_byte_size_t(),
      size,
-            /* allocator */ nullptr));
+      THManagedMapAllocator::makeDataPtr(manager_handle, object_handle, flags, size),
+      /*allocator=*/nullptr,
+      /*resizable=*/false)
+      .release());
  END_HANDLE_TH_ERRORS
 }

-static THWStorage* THPStorage_(newFdStorage)(ptrdiff_t size)
+static c10::StorageImpl* THPStorage_(newFdStorage)(ptrdiff_t size)
 {
  int flags = at::ALLOCATOR_MAPPED_SHAREDMEM |
              at::ALLOCATOR_MAPPED_EXCLUSIVE |
@ -136,7 +147,13 @@ static THWStorage* THPStorage_(newFdStorage)(ptrdiff_t size)
              at::ALLOCATOR_MAPPED_UNLINK;
  std::string handle = at::NewProcessWideShmHandle();
  auto sptr = at::MapAllocator::makeDataPtr(handle.c_str(), flags, size * sizeof(scalar_t), nullptr);
-  return THWStorage_(newWithDataAndAllocator)(std::move(sptr), size, /* allocator */ nullptr);
+  return c10::make_intrusive<at::StorageImpl>(
+    c10::StorageImpl::use_byte_size_t(),
+    size,
+    std::move(sptr),
+    /*allocator=*/nullptr,
+    /*resizable=*/false)
+    .release();
 }

 static PyObject * THPStorage_(pyNewFdStorage)(PyObject *_unused, PyObject *args)
@ -155,21 +172,19 @@ static PyObject * THPStorage_(shareFd)(PyObject *_self, PyObject *noargs)
 {
  HANDLE_TH_ERRORS
  auto self = (THPStorage*)_self;
-  THWStorage *storage = self->cdata;
+  c10::StorageImpl *storage = self->cdata;
  // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
  at::MapAllocator *ctx;
  // Storage is already in shared memory, just return a handle
  if ((ctx = at::MapAllocator::fromDataPtr(storage->data_ptr()))) {
    // done
  } else {
-    THWStoragePtr new_storage(
-        THPStorage_(newFdStorage)(storage->nbytes() / sizeof(scalar_t)));
-
-    at::Storage new_storage_aten = at::unsafeStorageFromTH(new_storage.get(), /*retain=*/true);
+    c10::StorageImpl* new_storage = THPStorage_(newFdStorage)(storage->nbytes());
+    at::Storage new_storage_aten = at::unsafeStorageFromTH(new_storage, /*retain=*/true);
    at::Storage _self_aten = torch::createStorage(_self);
    storage_copy(new_storage_aten, _self_aten);

-    THWStorage_(swap)(storage, new_storage);
+    std::swap(*storage, *new_storage);
    ctx = at::MapAllocator::fromDataPtr(storage->data_ptr());
    AT_ASSERT(ctx);
  }
@ -212,10 +227,13 @@ static PyObject * THPStorage_(newSharedFd)(PyObject *_unused, PyObject *args)
              at::ALLOCATOR_MAPPED_KEEPFD |
              at::ALLOCATOR_MAPPED_FROMFD;
  return THPStorage_(New)(
-          THWStorage_(newWithDataAndAllocator)(
-            // TODO: Maybe we should read out the scalar_t size and use it for size
-            at::MapAllocator::makeDataPtr(at::WITH_FD, "", fd, flags, size * sizeof(scalar_t), nullptr),
-            size, /* allocator */ nullptr));
+    c10::make_intrusive<at::StorageImpl>(
+      c10::StorageImpl::use_byte_size_t(),
+      size,
+      at::MapAllocator::makeDataPtr(at::WITH_FD, "", fd, flags, size, nullptr),
+      /*allocator=*/nullptr,
+      /*resizable=*/false)
+      .release());
  END_HANDLE_TH_ERRORS
 }

@ -225,7 +243,7 @@ static PyObject * THPStorage_(shareCuda)(PyObject *_self, PyObject *noargs)
 {
  HANDLE_TH_ERRORS
  auto self = (THPStorage*)_self;
-  THWStorage *storage = self->cdata;
+  c10::StorageImpl *storage = self->cdata;

  if (storage->received_cuda()) {
    AT_ERROR(
@ -471,10 +489,13 @@ static PyObject * THPStorage_(newSharedCuda)(PyObject *_unused, PyObject *args)
      },
      at::Device(at::DeviceType::CUDA, cur_device));

-  THWStoragePtr base(THWStorage_(newWithDataAndAllocator)(
-      LIBRARY_STATE
+  auto base = c10::make_intrusive<at::StorageImpl>(
+    c10::StorageImpl::use_byte_size_t(),
+    storage_size,
    std::move(data_ptr),
-      storage_size, /* allocator */ nullptr));
+    /*allocator=*/nullptr,
+    /*resizable=*/false);
+
  base->set_resizable(false);
  base->set_received_cuda(true);

@ -483,15 +504,15 @@ static PyObject * THPStorage_(newSharedCuda)(PyObject *_unused, PyObject *args)
 }
 #endif

-// Returns an object that holds a "weak" pointer to the THStorage.  This
-// pointer keeps the THStorage struct live, but does not retain the data
+// Returns an object that holds a "weak" pointer to the c10::StorageImpl.  This
+// pointer keeps the c10::StorageImpl struct live, but does not retain the data
 // pointer.
 //
 // NB: This does NOT preserve object identity when you call it multiple times
 static PyObject * THPStorage_(weakRef)(PyObject *_self, PyObject *args) {
  HANDLE_TH_ERRORS
  auto self = (THPStorage*)_self;
-  THStorage* storage = self->cdata;
+  c10::StorageImpl* storage = self->cdata;
  return PyLong_FromVoidPtr(c10::raw::intrusive_ptr::make_weak(storage));
  END_HANDLE_TH_ERRORS
 }
@ -501,7 +522,7 @@ PyObject * THPStorage_(newWithWeakPtr)(PyObject *_unused, PyObject *arg)
  HANDLE_TH_ERRORS
  THPUtils_assert(THPUtils_checkLong(arg),
      "_new_with_weak_ptr(): arg must be an 'int'");
-  THStorage *weak_storage = (THStorage*)PyLong_AsVoidPtr(arg);
+  c10::StorageImpl *weak_storage = (c10::StorageImpl*)PyLong_AsVoidPtr(arg);
  if (auto* storage = c10::raw::weak_intrusive_ptr::lock(weak_storage)) {
    return THPStorage_(New)(storage);
  }
@ -517,7 +538,7 @@ PyObject * THPStorage_(freeWeakRef)(PyObject *_unused, PyObject *arg)
  }
  THPUtils_assert(THPUtils_checkLong(arg),
      "_free_weak_ref(): arg must be an 'int'");
-  THStorage *weak_storage = (THStorage*)PyLong_AsVoidPtr(arg);
+  c10::StorageImpl *weak_storage = (c10::StorageImpl*)PyLong_AsVoidPtr(arg);
  c10::raw::weak_intrusive_ptr::decref(weak_storage);

  Py_RETURN_NONE;
@ -528,7 +549,7 @@ PyObject * THPStorage_(expired)(PyObject *_unused, PyObject *arg)
 {
  HANDLE_TH_ERRORS
  THPUtils_assert(THPUtils_checkLong(arg), "_expired(): arg must be an 'int'");
-  THStorage *weak_storage = (THStorage*)PyLong_AsVoidPtr(arg);
+  c10::StorageImpl *weak_storage = (c10::StorageImpl*)PyLong_AsVoidPtr(arg);
  return PyBool_FromLong(c10::raw::weak_intrusive_ptr::use_count(weak_storage) == 0);
  END_HANDLE_TH_ERRORS
 }
@ -539,7 +560,7 @@ PyObject * THPStorage_(sharedFd)(PyObject *_self, PyObject *noargs)
  auto self = (THPStorage*)_self;
  at::MapAllocator *ctx = nullptr;
 #ifndef THC_GENERIC_FILE
-  THWStorage *storage = self->cdata;
+  c10::StorageImpl *storage = self->cdata;
  ctx = at::MapAllocator::fromDataPtr(storage->data_ptr());
 #endif

--- a/torch/csrc/generic/serialization.cpp
+++ b/torch/csrc/generic/serialization.cpp
@ -4,13 +4,15 @@

 #ifdef THC_GENERIC_FILE
 #include <c10/cuda/CUDAGuard.h>
+#else
+#include <c10/core/CPUAllocator.h>
 #endif

 // save_save is necessary since the old eager format saved storages as
 // [size + data], but the v1.5 eager format removes this since size is saved in
 // the filesize.
 template <class io>
-void THPStorage_(writeFileRaw)(THWStorage *self, io fd, bool save_size, uint64_t element_size)
+void THPStorage_(writeFileRaw)(c10::StorageImpl *self, io fd, bool save_size, uint64_t element_size)
 {
 #ifdef THC_GENERIC_FILE
  c10::cuda::CUDAGuard guard(self->device());
@ -84,11 +86,11 @@ void THPStorage_(writeFileRaw)(THWStorage *self, io fd, bool save_size, uint64_t
  }
 }

-template void THPStorage_(writeFileRaw<int>)(THWStorage *self, int fd, bool save_size, uint64_t element_size);
-template void THPStorage_(writeFileRaw<PyObject*>)(THWStorage *self, PyObject* fd, bool save_size, uint64_t element_size);
+template void THPStorage_(writeFileRaw<int>)(c10::StorageImpl *self, int fd, bool save_size, uint64_t element_size);
+template void THPStorage_(writeFileRaw<PyObject*>)(c10::StorageImpl *self, PyObject* fd, bool save_size, uint64_t element_size);

 template <class io>
-THWStorage * THPStorage_(readFileRaw)(io file, THWStorage *_storage, uint64_t element_size)
+c10::StorageImpl * THPStorage_(readFileRaw)(io file, c10::StorageImpl *_storage, uint64_t element_size)
 {
 #ifdef THC_GENERIC_FILE
  c10::cuda::OptionalCUDAGuard guard;
@ -111,9 +113,17 @@ THWStorage * THPStorage_(readFileRaw)(io file, THWStorage *_storage, uint64_t el
    torch::utils::THP_decodeInt64Buffer(
        &nbytes, (const uint8_t*)&nsize, torch::utils::THP_nativeByteOrder(), 1);
  }
-  THWStoragePtr storage;
+  c10::intrusive_ptr<at::StorageImpl> storage;
  if (_storage == nullptr) {
-    storage = THWStorage_(newWithSize)(LIBRARY_STATE nbytes);
+    storage = c10::make_intrusive<at::StorageImpl>(
+      c10::StorageImpl::use_byte_size_t(),
+      nbytes,
+#if defined(THC_GENERIC_FILE)
+      c10::cuda::CUDACachingAllocator::get(),
+#else
+      c10::GetDefaultCPUAllocator(),
+#endif
+      /*resizable=*/true);
  } else {
    int64_t _storage_nbytes = _storage->nbytes();
    THPUtils_assert(
@ -121,7 +131,7 @@ THWStorage * THPStorage_(readFileRaw)(io file, THWStorage *_storage, uint64_t el
        "storage has wrong byte size: expected %ld got %ld",
        nbytes,
        _storage_nbytes);
-    storage = _storage;
+    storage = c10::intrusive_ptr<at::StorageImpl>::reclaim(_storage);
  }

 #ifndef THC_GENERIC_FILE
@ -176,7 +186,7 @@ THWStorage * THPStorage_(readFileRaw)(io file, THWStorage *_storage, uint64_t el
  return storage.release();
 }

-template THWStorage* THPStorage_(readFileRaw<int>)(int fd, THWStorage* storage, uint64_t element_size);
-template THWStorage* THPStorage_(readFileRaw<PyObject*>)(PyObject* fd, THWStorage* storage, uint64_t element_size);
+template c10::StorageImpl* THPStorage_(readFileRaw<int>)(int fd, c10::StorageImpl* storage, uint64_t element_size);
+template c10::StorageImpl* THPStorage_(readFileRaw<PyObject*>)(PyObject* fd, c10::StorageImpl* storage, uint64_t element_size);

 #endif
--- a/torch/csrc/generic/serialization.h
+++ b/torch/csrc/generic/serialization.h
@ -3,9 +3,9 @@
 #else

 template <class io>
-void THPStorage_(writeFileRaw)(THWStorage *self, io fd, bool save_size, uint64_t element_size);
+void THPStorage_(writeFileRaw)(c10::StorageImpl *self, io fd, bool save_size, uint64_t element_size);

 template <class io>
-THWStorage * THPStorage_(readFileRaw)(io fd, THWStorage *storage, uint64_t element_size);
+c10::StorageImpl * THPStorage_(readFileRaw)(io fd, c10::StorageImpl *storage, uint64_t element_size);

 #endif
--- a/torch/csrc/generic/utils.h
+++ b/torch/csrc/generic/utils.h
@ -11,7 +11,6 @@
 struct THPStorage;
 struct THSPTensor;

-typedef class THPPointer<THWStorage>      THWStoragePtr;
 typedef class THPPointer<THWTensor>       THWTensorPtr;
 typedef class THPPointer<THPStorage>     THPStoragePtr;

--- a/torch/csrc/utils.cpp
+++ b/torch/csrc/utils.cpp
@ -198,6 +198,26 @@ void storage_copy(at::Storage dst, at::Storage src, bool non_blocking) {
  dst_t.copy_(src_t, non_blocking);
 }

+void storage_fill(at::Storage self, uint8_t value) {
+  auto options = c10::TensorOptions().device(self.device()).dtype(at::kByte);
+  auto self_t = at::empty({0}, {}, options).set_(self);
+  self_t.fill_(value);
+}
+
+void storage_set(at::Storage self, ptrdiff_t idx, uint8_t value) {
+  TORCH_CHECK((idx >= 0) && (idx < self.nbytes()), "out of bounds");
+  auto options = c10::TensorOptions().device(self.device()).dtype(at::kByte);
+  auto self_t = at::empty({0}, {}, options).set_(self);
+  self_t[idx].fill_(value);
+}
+
+uint8_t storage_get(at::Storage self, ptrdiff_t idx) {
+  TORCH_CHECK((idx >= 0) && (idx < self.nbytes()), "out of bounds");
+  auto options = c10::TensorOptions().device(self.device()).dtype(at::kByte);
+  auto self_t = at::empty({0}, {}, options).set_(self);
+  return self_t[idx].item<uint8_t>();
+}
+
 template class THPPointer<THPStorage>;

 namespace torch { namespace gdb {
--- a/torch/csrc/utils.h
+++ b/torch/csrc/utils.h
@ -138,7 +138,6 @@ void THPUtils_addPyMethodDefs(std::vector<PyMethodDef>& vector, PyMethodDef* met

 int THPUtils_getCallable(PyObject *arg, PyObject **result);

-#define THWStoragePtr TH_CONCAT_3(TH,Real,StoragePtr)
 #define THWTensorPtr  TH_CONCAT_3(TH,Real,TensorPtr)
 #define THPStoragePtr TH_CONCAT_3(THP,Real,StoragePtr)
 #define THPTensorPtr  TH_CONCAT_3(THP,Real,TensorPtr)
@ -181,5 +180,8 @@ std::vector<c10::optional<at::cuda::CUDAStream>> THPUtils_PySequence_to_CUDAStre
 #endif

 void storage_copy(at::Storage dst, at::Storage src, bool non_blocking=false);
+void storage_fill(at::Storage self, uint8_t value);
+void storage_set(at::Storage self, ptrdiff_t idx, uint8_t value);
+uint8_t storage_get(at::Storage self, ptrdiff_t idx);

 #endif