mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
Add API query for available per-process CUDA memory (#140620)
Certain `cpp_wrapper`-enabled tests were OOM-ing in the CI pipeline, with error messages suggesting that sufficient memory was accessible. This ultimately resulted from an internal memory limitation that was not queryable in the API. This PR adds querying for that limit. Additionally, the failing tests had incorrect memory availability checks, and are updated with measured memory requirements. Pull Request resolved: https://github.com/pytorch/pytorch/pull/140620 Approved by: https://github.com/malfet, https://github.com/eqy ghstack dependencies: #141367
This commit is contained in:
committed by
PyTorch MergeBot
parent
5c33c9202f
commit
4959784dac
@ -536,6 +536,20 @@ PyObject* THCPModule_hasPrimaryContext(PyObject* _unused, PyObject* arg) {
|
||||
END_HANDLE_TH_ERRORS
|
||||
}
|
||||
|
||||
PyObject* THCPModule_getMemoryFraction(PyObject* _unused, PyObject* args) {
|
||||
HANDLE_TH_ERRORS
|
||||
PyObject* device_o = nullptr;
|
||||
if (!PyArg_ParseTuple(args, "O", &device_o)) {
|
||||
THPUtils_invalidArguments(
|
||||
args, nullptr, "get_memory_fraction", 1, "(int device);");
|
||||
return nullptr;
|
||||
}
|
||||
auto device_index = THPUtils_unpackDeviceIndex(device_o);
|
||||
return PyFloat_FromDouble(
|
||||
c10::cuda::CUDACachingAllocator::getMemoryFraction(device_index));
|
||||
END_HANDLE_TH_ERRORS
|
||||
}
|
||||
|
||||
PyObject* THCPModule_setMemoryFraction(PyObject* _unused, PyObject* args) {
|
||||
HANDLE_TH_ERRORS
|
||||
PyObject* fraction_o = nullptr;
|
||||
@ -1872,6 +1886,10 @@ static struct PyMethodDef _THCPModule_methods[] = {
|
||||
METH_NOARGS,
|
||||
nullptr},
|
||||
{"_cuda_hasPrimaryContext", THCPModule_hasPrimaryContext, METH_O, nullptr},
|
||||
{"_cuda_getMemoryFraction",
|
||||
THCPModule_getMemoryFraction,
|
||||
METH_VARARGS,
|
||||
nullptr},
|
||||
{"_cuda_setMemoryFraction",
|
||||
THCPModule_setMemoryFraction,
|
||||
METH_VARARGS,
|
||||
|
Reference in New Issue
Block a user