Reuse AcceleratorAllocatorConfig in CUDAAllocatorConfig (#165135)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/165135 Approved by: https://github.com/Skylion007 ghstack dependencies: #165129, #165131
2025-10-20 21:14:14 +08:00 · 2025-10-14 13:29:33 +00:00
parent 03e5dbb26e
commit 608a6d4a26
2 changed files with 16 additions and 26 deletions
--- a/c10/cuda/CUDAAllocatorConfig.cpp
+++ b/c10/cuda/CUDAAllocatorConfig.cpp
@ -32,22 +32,8 @@ CUDAAllocatorConfig::CUDAAllocatorConfig()
 }

 size_t CUDAAllocatorConfig::roundup_power2_divisions(size_t size) {
-  size_t log_size = (63 - llvm::countLeadingZeros(size));
-
-  // Our intervals start at 1MB and end at 64GB
-  const size_t interval_start =
-      63 - llvm::countLeadingZeros(static_cast<size_t>(1048576));
-  const size_t interval_end =
-      63 - llvm::countLeadingZeros(static_cast<size_t>(68719476736));
-  TORCH_CHECK(
-      (interval_end - interval_start == kRoundUpPowerOfTwoIntervals),
-      "kRoundUpPowerOfTwoIntervals mismatch");
-
-  int index = static_cast<int>(log_size) - static_cast<int>(interval_start);
-
-  index = std::max(0, index);
-  index = std::min(index, static_cast<int>(kRoundUpPowerOfTwoIntervals) - 1);
-  return instance().m_roundup_power2_divisions[index];
+  return c10::CachingAllocator::AcceleratorAllocatorConfig::
+      roundup_power2_divisions(size);
 }

 void CUDAAllocatorConfig::lexArgs(
--- a/c10/cuda/CUDAAllocatorConfig.h
+++ b/c10/cuda/CUDAAllocatorConfig.h
@ -17,20 +17,23 @@ enum class Expandable_Segments_Handle_Type : int {
 class C10_CUDA_API CUDAAllocatorConfig {
 public:
  static size_t max_split_size() {
-    return instance().m_max_split_size;
+    return c10::CachingAllocator::AcceleratorAllocatorConfig::max_split_size();
  }
  static double garbage_collection_threshold() {
-    return instance().m_garbage_collection_threshold;
+    return c10::CachingAllocator::AcceleratorAllocatorConfig::
+        garbage_collection_threshold();
  }

  static bool expandable_segments() {
+    bool enabled = c10::CachingAllocator::AcceleratorAllocatorConfig::
+        use_expandable_segments();
 #ifndef PYTORCH_C10_DRIVER_API_SUPPORTED
-    if (instance().m_expandable_segments) {
+    if (enabled) {
      TORCH_WARN_ONCE("expandable_segments not supported on this platform")
    }
    return false;
 #else
-    return instance().m_expandable_segments;
+    return enabled;
 #endif
  }

@ -61,7 +64,8 @@ class C10_CUDA_API CUDAAllocatorConfig {
  }

  static bool pinned_use_background_threads() {
-    return instance().m_pinned_use_background_threads;
+    return c10::CachingAllocator::AcceleratorAllocatorConfig::
+        pinned_use_background_threads();
  }

  static size_t pinned_reserve_segment_size_mb() {
@ -82,17 +86,17 @@ class C10_CUDA_API CUDAAllocatorConfig {
  static size_t roundup_power2_divisions(size_t size);

  static std::vector<size_t> roundup_power2_divisions() {
-    return instance().m_roundup_power2_divisions;
+    return c10::CachingAllocator::AcceleratorAllocatorConfig::
+        roundup_power2_divisions();
  }

  static size_t max_non_split_rounding_size() {
-    return instance().m_max_non_split_rounding_size;
+    return c10::CachingAllocator::AcceleratorAllocatorConfig::
+        max_non_split_rounding_size();
  }

  static std::string last_allocator_settings() {
-    std::lock_guard<std::mutex> lock(
-        instance().m_last_allocator_settings_mutex);
-    return instance().m_last_allocator_settings;
+    return c10::CachingAllocator::getAllocatorSettings();
  }

  static CUDAAllocatorConfig& instance() {