Reuse AcceleratorAllocatorConfig in CUDAAllocatorConfig (#165135)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/165135
Approved by: https://github.com/Skylion007
ghstack dependencies: #165129, #165131
This commit is contained in:
Yu, Guangye
2025-10-14 13:29:33 +00:00
committed by PyTorch MergeBot
parent 03e5dbb26e
commit 608a6d4a26
2 changed files with 16 additions and 26 deletions

View File

@ -32,22 +32,8 @@ CUDAAllocatorConfig::CUDAAllocatorConfig()
}
size_t CUDAAllocatorConfig::roundup_power2_divisions(size_t size) {
size_t log_size = (63 - llvm::countLeadingZeros(size));
// Our intervals start at 1MB and end at 64GB
const size_t interval_start =
63 - llvm::countLeadingZeros(static_cast<size_t>(1048576));
const size_t interval_end =
63 - llvm::countLeadingZeros(static_cast<size_t>(68719476736));
TORCH_CHECK(
(interval_end - interval_start == kRoundUpPowerOfTwoIntervals),
"kRoundUpPowerOfTwoIntervals mismatch");
int index = static_cast<int>(log_size) - static_cast<int>(interval_start);
index = std::max(0, index);
index = std::min(index, static_cast<int>(kRoundUpPowerOfTwoIntervals) - 1);
return instance().m_roundup_power2_divisions[index];
return c10::CachingAllocator::AcceleratorAllocatorConfig::
roundup_power2_divisions(size);
}
void CUDAAllocatorConfig::lexArgs(

View File

@ -17,20 +17,23 @@ enum class Expandable_Segments_Handle_Type : int {
class C10_CUDA_API CUDAAllocatorConfig {
public:
static size_t max_split_size() {
return instance().m_max_split_size;
return c10::CachingAllocator::AcceleratorAllocatorConfig::max_split_size();
}
static double garbage_collection_threshold() {
return instance().m_garbage_collection_threshold;
return c10::CachingAllocator::AcceleratorAllocatorConfig::
garbage_collection_threshold();
}
static bool expandable_segments() {
bool enabled = c10::CachingAllocator::AcceleratorAllocatorConfig::
use_expandable_segments();
#ifndef PYTORCH_C10_DRIVER_API_SUPPORTED
if (instance().m_expandable_segments) {
if (enabled) {
TORCH_WARN_ONCE("expandable_segments not supported on this platform")
}
return false;
#else
return instance().m_expandable_segments;
return enabled;
#endif
}
@ -61,7 +64,8 @@ class C10_CUDA_API CUDAAllocatorConfig {
}
static bool pinned_use_background_threads() {
return instance().m_pinned_use_background_threads;
return c10::CachingAllocator::AcceleratorAllocatorConfig::
pinned_use_background_threads();
}
static size_t pinned_reserve_segment_size_mb() {
@ -82,17 +86,17 @@ class C10_CUDA_API CUDAAllocatorConfig {
static size_t roundup_power2_divisions(size_t size);
static std::vector<size_t> roundup_power2_divisions() {
return instance().m_roundup_power2_divisions;
return c10::CachingAllocator::AcceleratorAllocatorConfig::
roundup_power2_divisions();
}
static size_t max_non_split_rounding_size() {
return instance().m_max_non_split_rounding_size;
return c10::CachingAllocator::AcceleratorAllocatorConfig::
max_non_split_rounding_size();
}
static std::string last_allocator_settings() {
std::lock_guard<std::mutex> lock(
instance().m_last_allocator_settings_mutex);
return instance().m_last_allocator_settings;
return c10::CachingAllocator::getAllocatorSettings();
}
static CUDAAllocatorConfig& instance() {