Update round size with 1 division behavior (#162203)

have round size return nearest power of 2 greater than or equal to size with 1 division

Fixes #161139

Pull Request resolved: https://github.com/pytorch/pytorch/pull/162203
Approved by: https://github.com/ezyang
This commit is contained in:
morrison-turnansky
2025-10-08 06:41:42 +00:00
committed by PyTorch MergeBot
parent 65aa62d50d
commit 12d2ef557f
2 changed files with 17 additions and 0 deletions

View File

@ -2502,6 +2502,8 @@ class DeviceCachingAllocator {
auto divisions = CUDAAllocatorConfig::roundup_power2_divisions(size);
if (divisions > 1 && size > (kMinBlockSize * divisions)) {
return roundup_power2_next_division(size, divisions);
} else if (divisions == 1) {
return llvm::PowerOf2Ceil(size);
} else {
return kMinBlockSize * ((size + kMinBlockSize - 1) / kMinBlockSize);
}

View File

@ -4522,6 +4522,21 @@ class TestCudaMallocAsync(TestCase):
reg_mem = torch.cuda.memory_stats()[key_allocated]
self.assertEqual(reg_mem - start_mem, nbytes)
# Test division==1 case.
torch.cuda.memory.empty_cache()
div1_start_mem = torch.cuda.memory_stats()[key_allocated]
div1_start_requested = torch.cuda.memory_stats()[key_requested]
torch.cuda.memory._set_allocator_settings("roundup_power2_divisions:1")
torch.rand(nelems, device="cuda")
div1_end_mem = torch.cuda.memory_stats()[key_allocated]
div1_end_requested = torch.cuda.memory_stats()[key_requested]
self.assertEqual(div1_start_mem - start_mem, nbytes)
if not TEST_CUDAMALLOCASYNC:
# not supported with the cudaMallocAsync backend
self.assertEqual(div1_end_mem - div1_start_mem, power2_div(nbytes, 1))
self.assertEqual(div1_end_requested - div1_start_requested, nbytes)
with self.assertRaises(RuntimeError):
torch.cuda.memory._set_allocator_settings("foo:1,bar:2")