diff --git a/BUILD.bazel b/BUILD.bazel
index d4202e7a2c1e..58ebc31e243c 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -747,7 +747,6 @@ cc_library(
             "torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.cu",
             "torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryOps.cu",
             "torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp",
-            "torch/csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp",
             "torch/csrc/distributed/c10d/symm_mem/intra_node_comm.cu",
         ],
     )) + torch_sources,
diff --git a/build_variables.bzl b/build_variables.bzl
index 0ab2e1623c32..dfae1d527bb7 100644
--- a/build_variables.bzl
+++ b/build_variables.bzl
@@ -755,7 +755,6 @@ libtorch_cuda_distributed_extra_sources = [
     "torch/csrc/distributed/c10d/symm_mem/NCCLSymmetricMemory.cu",
     "torch/csrc/distributed/c10d/symm_mem/intra_node_comm.cpp",
     "torch/csrc/distributed/c10d/symm_mem/intra_node_comm.cu",
-    "torch/csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp",
     "torch/csrc/distributed/rpc/tensorpipe_cuda.cpp",
 ]
 
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index a41c66301527..781e134ad0d3 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -581,7 +581,6 @@ if(USE_CUDA)
         ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryOps.cu
         ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp
         ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/NCCLSymmetricMemory.cu
-        ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp
         PROPERTIES COMPILE_FLAGS "-DPYTORCH_C10_DRIVER_API_SUPPORTED=1"
       )
     endif()
diff --git a/test/distributed/test_nvshmem.py b/test/distributed/test_nvshmem.py
index f8567cdad077..64b8062b6098 100644
--- a/test/distributed/test_nvshmem.py
+++ b/test/distributed/test_nvshmem.py
@@ -65,58 +65,6 @@ class NVSHMEMSymmetricMemoryTest(MultiProcContinuousTest):
         out = symm_mem.empty(numel, dtype=dtype, device=self.device)
         symm_mem.rendezvous(out, group=group_name)
 
-    @skipIfRocm
-    def test_mempool_tensor_factory(self) -> None:
-        """
-        Test the effectiveness of MemPool on tensor factory ops.
-        """
-        self._init_device()
-        group_name = dist.group.WORLD.group_name
-        symm_mem.enable_symm_mem_for_group(group_name)
-
-        dtype = torch.float
-        numel = 1024
-        src_rank = 0
-
-        allocator = symm_mem.get_mempool_allocator(self.device)
-        mempool = torch.cuda.MemPool(allocator)
-
-        with torch.cuda.use_mem_pool(mempool):
-            if self.rank == src_rank:
-                tensor = torch.arange(numel, dtype=dtype, device=self.device)
-            else:
-                tensor = torch.zeros(numel, dtype=dtype, device=self.device)
-
-        symm_mem.rendezvous(tensor, group=group_name)
-        torch.ops.symm_mem.nvshmem_broadcast(tensor, group_name)
-        self.assertEqual(tensor, torch.arange(numel, dtype=dtype, device=self.device))
-
-    @skipIfRocm
-    def test_mempool_compute_ops(self) -> None:
-        """
-        Apply MemPool context to a compute op that creates input to collective.
-        """
-        self._init_device()
-        group_name = dist.group.WORLD.group_name
-        symm_mem.enable_symm_mem_for_group(group_name)
-
-        dtype = torch.float
-        dim = 1024
-        w = torch.ones(dim, dim, dtype=dtype, device=self.device)
-        x0 = torch.ones(1, dim, dtype=dtype, device=self.device)
-
-        allocator = symm_mem.get_mempool_allocator(self.device)
-        mempool = torch.cuda.MemPool(allocator)
-
-        with torch.cuda.use_mem_pool(mempool):
-            x = x0 + self.rank
-            y = torch.mm(x, w)
-
-        # y should be a symm tensor
-        torch.ops.symm_mem.nvshmem_broadcast(y, group_name)
-        expected = torch.mm(x0, w)
-        self.assertEqual(y, expected)
-
     @skipIfRocm
     def test_nvshmem_put(self) -> None:
         self._init_device()
diff --git a/torch/_C/_distributed_c10d.pyi b/torch/_C/_distributed_c10d.pyi
index 0622cdf461aa..72fde27d0257 100644
--- a/torch/_C/_distributed_c10d.pyi
+++ b/torch/_C/_distributed_c10d.pyi
@@ -769,8 +769,6 @@ class _SymmetricMemory:
     def set_backend(name: str) -> None: ...
     @staticmethod
     def get_backend(device: torch.device) -> Optional[str]: ...
-    @staticmethod
-    def get_mempool_allocator(device: torch.device) -> Any: ...
     @property
     def rank(self) -> int: ...
     @property
diff --git a/torch/csrc/distributed/c10d/init.cpp b/torch/csrc/distributed/c10d/init.cpp
index fd612d46abad..a0904a814637 100644
--- a/torch/csrc/distributed/c10d/init.cpp
+++ b/torch/csrc/distributed/c10d/init.cpp
@@ -1128,9 +1128,6 @@ This class does not support ``__members__`` property.)");
           &::c10d::symmetric_memory::has_multicast_support)
       .def_static("set_backend", &::c10d::symmetric_memory::set_backend)
       .def_static("get_backend", &::c10d::symmetric_memory::get_backend)
-      .def_static(
-          "get_mempool_allocator",
-          &::c10d::symmetric_memory::get_mempool_allocator)
       .def_property_readonly("rank", &SymmetricMemory::get_rank)
       .def_property_readonly("world_size", &SymmetricMemory::get_world_size)
       .def_property_readonly(
diff --git a/torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.cpp b/torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.cpp
index 254a354285f8..2831a4416de9 100644
--- a/torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.cpp
+++ b/torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.cpp
@@ -266,28 +266,6 @@ TORCH_API bool has_multicast_support(
     return allocator->has_multicast_support(device_idx);
   }
 }
-
-static std::unordered_map<c10::DeviceType, std::shared_ptr<c10::Allocator>>
-    _mempool_allocators;
-
-void register_mempool_allocator(
-    c10::DeviceType device_type,
-    std::shared_ptr<c10::Allocator> allocator) {
-  _mempool_allocators[device_type] = std::move(allocator);
-}
-
-// Get allocator for MemPool given device
-std::shared_ptr<c10::Allocator> get_mempool_allocator(c10::Device device) {
-  auto it = _mempool_allocators.find(device.type());
-  if (it == _mempool_allocators.end()) {
-    TORCH_CHECK(
-        false,
-        "SymmetricMemory MemPool did not find backend for device type ",
-        device.type());
-  }
-  return it->second;
-}
-
 } // namespace c10d::symmetric_memory
 
 namespace {
diff --git a/torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.hpp b/torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.hpp
index 82586239a231..c2828de04c9b 100644
--- a/torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.hpp
+++ b/torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.hpp
@@ -184,11 +184,4 @@ TORCH_API void set_backend(const std::string& name);
 
 TORCH_API std::optional<std::string> get_backend(c10::Device device);
 
-C10_EXPORT void register_mempool_allocator(
-    c10::DeviceType device_type,
-    std::shared_ptr<c10::Allocator> allocator);
-
-TORCH_API std::shared_ptr<c10::Allocator> get_mempool_allocator(
-    c10::Device device);
-
 } // namespace c10d::symmetric_memory
diff --git a/torch/csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp b/torch/csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp
deleted file mode 100644
index bfbe02bd6f86..000000000000
--- a/torch/csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-#include <torch/csrc/cuda/CUDAPluggableAllocator.h>
-#include <torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.hpp>
-
-namespace {
-using namespace c10d::symmetric_memory;
-
-// Alloc functor for MemPool
-void* cuda_symm_alloc(size_t size, int device, void* stream) {
-  static auto allocator = get_allocator(c10::DeviceType::CUDA);
-  TORCH_CHECK(
-      allocator->name() == "NVSHMEM", "Only NVSHMEM backend is supported");
-  // Note: this alloc functor works for the NVSHMEM and NCCL backends only,
-  // because only these backends takes `nullopt` for the `group` argument which
-  // is not given by MemPool's invocation (actually these two backends requires
-  // it to be `nullopt`).
-  return allocator->alloc(size, device, /*group_name=*/std::nullopt);
-}
-
-// Free functor for MemPool
-void cuda_symm_free(void* ptr, size_t size, int device, void* stream) {
-  static auto allocator = get_allocator(c10::DeviceType::CUDA);
-  TORCH_CHECK(
-      allocator->name() == "NVSHMEM", "Only NVSHMEM backend is supported");
-  allocator->free(ptr);
-}
-
-// Register allocator for CUDA MemPool
-struct RegisterCUDAMemPoolAllocator {
-  RegisterCUDAMemPoolAllocator() {
-    std::shared_ptr<c10::cuda::CUDACachingAllocator::CUDAAllocator> allocator =
-        torch::cuda::CUDAPluggableAllocator::createCustomAllocator(
-            cuda_symm_alloc, cuda_symm_free);
-    register_mempool_allocator(c10::DeviceType::CUDA, allocator);
-  }
-};
-
-static RegisterCUDAMemPoolAllocator register_cuda_mempool_allocator_;
-
-} // namespace
diff --git a/torch/distributed/_symmetric_memory/__init__.py b/torch/distributed/_symmetric_memory/__init__.py
index 7b09d8780eb5..4b0e9acc19bd 100644
--- a/torch/distributed/_symmetric_memory/__init__.py
+++ b/torch/distributed/_symmetric_memory/__init__.py
@@ -1781,14 +1781,4 @@ def get_backend(device: _device) -> Optional[str]:
     return _SymmetricMemory.get_backend(torch.device(device))
 
 
-def get_mempool_allocator(device: _device):  # type: ignore[no-untyped-def]
-    r"""
-    Get the MemPool allocator for symmetric memory for a given device.
-    Args:
-        device (class:`torch.device` or str): the device for which to get the
-        MemPool allocator.
-    """
-    return _SymmetricMemory.get_mempool_allocator(torch.device(device))
-
-
 __all__ = ["empty", "rendezvous", "is_nvshmem_available", "set_backend", "get_backend"]