diff --git a/test/test_cuda.py b/test/test_cuda.py index 8cf7d5c87986..ac51c2daec05 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -4583,31 +4583,33 @@ class TestCudaMallocAsync(TestCase): self.assertEqual(reg_mem - start_mem, nbytes) with self.assertRaises(ValueError): - torch.cuda.memory._set_allocator_settings("foo:1,bar:2") + torch._C._accelerator_setAllocatorSettings("foo:1,bar:2") with self.assertRaises(ValueError): - torch.cuda.memory._set_allocator_settings( + torch._C._accelerator_setAllocatorSettings( "garbage_collection_threshold:1.2" ) with self.assertRaises(ValueError): - torch.cuda.memory._set_allocator_settings("max_split_size_mb:2") + torch._C._accelerator_setAllocatorSettings("max_split_size_mb:2") with self.assertRaises(ValueError): - torch.cuda.memory._set_allocator_settings("release_lock_on_cudamalloc:none") + torch._C._accelerator_setAllocatorSettings( + "release_lock_on_cudamalloc:none" + ) with self.assertRaises(ValueError): - torch.cuda.memory._set_allocator_settings( + torch._C._accelerator_setAllocatorSettings( "pinned_use_cuda_host_register:none" ) - with self.assertRaises(RuntimeError): - torch.cuda.memory._set_allocator_settings( + with self.assertRaises(ValueError): + torch._C._accelerator_setAllocatorSettings( "pinned_num_register_threads:none" ) with self.assertRaises(ValueError): - torch.cuda.memory._set_allocator_settings( + torch._C._accelerator_setAllocatorSettings( "pinned_num_register_threads:1024" ) diff --git a/torch/_C/__init__.pyi.in b/torch/_C/__init__.pyi.in index b99fd3f2b80a..abdc3d58a2b3 100644 --- a/torch/_C/__init__.pyi.in +++ b/torch/_C/__init__.pyi.in @@ -2048,7 +2048,6 @@ def _cuda_cudaHostAllocator() -> _int: ... def _cuda_cudaCachingAllocator_raw_alloc(size: _int, cuda_stream: _int) -> _int: ... def _cuda_cudaCachingAllocator_raw_delete(ptr: _int) -> None: ... def _cuda_cudaCachingAllocator_enable(val: _bool) -> None: ... -def _cuda_cudaCachingAllocator_set_allocator_settings(env: str) -> None: ... def _cuda_beginAllocateToPool(device: _int, mempool_id: tuple[_int, _int]) -> None: ... def _cuda_beginAllocateCurrentThreadToPool( device: _int, @@ -2477,6 +2476,7 @@ def _accelerator_emptyCache() -> None: ... def _accelerator_getDeviceStats(device_index: _int) -> dict[str, Any]: ... def _accelerator_resetAccumulatedStats(device_index: _int) -> None: ... def _accelerator_resetPeakStats(device_index: _int) -> None: ... +def _accelerator_setAllocatorSettings(env: str) -> None: ... # Defined in torch/csrc/jit/python/python_tracer.cpp class TracingState: diff --git a/torch/_dynamo/trace_rules.py b/torch/_dynamo/trace_rules.py index e138fc2e9ea4..cf480377057a 100644 --- a/torch/_dynamo/trace_rules.py +++ b/torch/_dynamo/trace_rules.py @@ -449,6 +449,7 @@ torch_c_binding_in_graph_functions = dict.fromkeys( "torch._C._accelerator_getAccelerator", "torch._C._accelerator_getDeviceIndex", "torch._C._accelerator_getStream", + "torch._C._accelerator_setAllocatorSettings", "torch._C._accelerator_setStream", "torch._C._accelerator_synchronizeDevice", "torch._C._activate_gpu_trace", @@ -505,7 +506,6 @@ torch_c_binding_in_graph_functions = dict.fromkeys( "torch._C._cuda_clearCublasWorkspaces", "torch._C._cuda_cudaCachingAllocator_raw_alloc", "torch._C._cuda_cudaCachingAllocator_raw_delete", - "torch._C._cuda_cudaCachingAllocator_set_allocator_settings", "torch._C._cuda_cudaHostAllocator", "torch._C._cuda_customAllocator", "torch._C._cuda_emptyCache", diff --git a/torch/csrc/DeviceAccelerator.cpp b/torch/csrc/DeviceAccelerator.cpp index dc3da8881a71..b6176f11aaf6 100644 --- a/torch/csrc/DeviceAccelerator.cpp +++ b/torch/csrc/DeviceAccelerator.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -136,6 +137,10 @@ void initModule(PyObject* module) { m.def("_accelerator_resetPeakStats", [](c10::DeviceIndex device_index) { at::accelerator::resetPeakStats(device_index); }); + + m.def("_accelerator_setAllocatorSettings", [](std::string env) { + c10::CachingAllocator::setAllocatorSettings(env); + }); } } // namespace torch::accelerator diff --git a/torch/csrc/cuda/Module.cpp b/torch/csrc/cuda/Module.cpp index 32ade3680980..a9a5a13206f9 100644 --- a/torch/csrc/cuda/Module.cpp +++ b/torch/csrc/cuda/Module.cpp @@ -20,8 +20,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -422,16 +422,6 @@ PyObject* THCPModule_cudaCachingAllocator_enable( END_HANDLE_TH_ERRORS } -PyObject* THCPModule_cudaCachingAllocator_set_allocator_settings( - PyObject* _unused, - PyObject* env) { - HANDLE_TH_ERRORS - c10::cuda::CUDACachingAllocator::setAllocatorSettings( - THPUtils_unpackString(env)); - Py_RETURN_NONE; - END_HANDLE_TH_ERRORS -} - PyObject* THCPModule_getAllocatorBackend(PyObject* _unused, PyObject* noargs) { HANDLE_TH_ERRORS return THPUtils_packString(c10::cuda::CUDACachingAllocator::name()); @@ -2077,10 +2067,6 @@ static struct PyMethodDef _THCPModule_methods[] = { THCPModule_cudaCachingAllocator_enable, METH_O, nullptr}, - {"_cuda_cudaCachingAllocator_set_allocator_settings", - THCPModule_cudaCachingAllocator_set_allocator_settings, - METH_O, - nullptr}, {"_cuda_getAllocatorBackend", THCPModule_getAllocatorBackend, METH_NOARGS, diff --git a/torch/cuda/memory.py b/torch/cuda/memory.py index b38cd2fa59c7..5844f53da841 100644 --- a/torch/cuda/memory.py +++ b/torch/cuda/memory.py @@ -1101,8 +1101,12 @@ def _save_memory_usage(filename="output.svg", snapshot=None): f.write(_memory(snapshot)) +@deprecated( + "torch.cuda._set_allocator_settings is deprecated. Use torch._C._accelerator_setAllocatorSettings instead.", + category=FutureWarning, +) def _set_allocator_settings(env: str): - return torch._C._cuda_cudaCachingAllocator_set_allocator_settings(env) + return torch._C._accelerator_setAllocatorSettings(env) def get_allocator_backend() -> str: