mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Update round size with 1 division behavior (#162203)
have round size return nearest power of 2 greater than or equal to size with 1 division Fixes #161139 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162203 Approved by: https://github.com/ezyang
This commit is contained in:
committed by
PyTorch MergeBot
parent
65aa62d50d
commit
12d2ef557f
@ -2502,6 +2502,8 @@ class DeviceCachingAllocator {
|
||||
auto divisions = CUDAAllocatorConfig::roundup_power2_divisions(size);
|
||||
if (divisions > 1 && size > (kMinBlockSize * divisions)) {
|
||||
return roundup_power2_next_division(size, divisions);
|
||||
} else if (divisions == 1) {
|
||||
return llvm::PowerOf2Ceil(size);
|
||||
} else {
|
||||
return kMinBlockSize * ((size + kMinBlockSize - 1) / kMinBlockSize);
|
||||
}
|
||||
|
@ -4522,6 +4522,21 @@ class TestCudaMallocAsync(TestCase):
|
||||
reg_mem = torch.cuda.memory_stats()[key_allocated]
|
||||
self.assertEqual(reg_mem - start_mem, nbytes)
|
||||
|
||||
# Test division==1 case.
|
||||
torch.cuda.memory.empty_cache()
|
||||
div1_start_mem = torch.cuda.memory_stats()[key_allocated]
|
||||
div1_start_requested = torch.cuda.memory_stats()[key_requested]
|
||||
torch.cuda.memory._set_allocator_settings("roundup_power2_divisions:1")
|
||||
torch.rand(nelems, device="cuda")
|
||||
div1_end_mem = torch.cuda.memory_stats()[key_allocated]
|
||||
div1_end_requested = torch.cuda.memory_stats()[key_requested]
|
||||
|
||||
self.assertEqual(div1_start_mem - start_mem, nbytes)
|
||||
if not TEST_CUDAMALLOCASYNC:
|
||||
# not supported with the cudaMallocAsync backend
|
||||
self.assertEqual(div1_end_mem - div1_start_mem, power2_div(nbytes, 1))
|
||||
self.assertEqual(div1_end_requested - div1_start_requested, nbytes)
|
||||
|
||||
with self.assertRaises(RuntimeError):
|
||||
torch.cuda.memory._set_allocator_settings("foo:1,bar:2")
|
||||
|
||||
|
Reference in New Issue
Block a user