mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Use is_available instead of device_count to check for CUDA availability (#97043)
There are some tests that incorrectly uses the number of GPU devices `torch.cuda.device_count() > 0` to check for CUDA availability instead of the default `torch.cuda.is_available()` call. This makes these tests more brittle when encountering infra flakiness on G5 runner using A10G, for example [test_pytorch_np](https://hud.pytorch.org/failure/FAILED%20test_tensorboard.py%3A%3ATestTensorBoardPyTorchNumpy%3A%3Atest_pytorch_np%20-%20RuntimeError%3A%20No%20CUDA%20GPUs%20are%20available). The underlying problem is that GPU devices could crash on these runner. While the root cause for that is unclear and we will try to upgrade to a new NVIDIA driver https://github.com/pytorch/pytorch/pull/96904 to see if it helps, we can also make these tests more resilient by using the correct check to skip tests correctly when GPU crashes. Pull Request resolved: https://github.com/pytorch/pytorch/pull/97043 Approved by: https://github.com/clee2000
This commit is contained in:
@ -2941,7 +2941,7 @@ class TestSparse(TestSparseBase):
|
||||
self.skipTest(f'requires_grad==True requires float or complex dtype, got {dtype}')
|
||||
|
||||
self._test_empty_full(device, dtype, requires_grad)
|
||||
if torch.cuda.device_count() > 0:
|
||||
if torch.cuda.is_available():
|
||||
self._test_empty_full(None, dtype, requires_grad)
|
||||
self._test_empty_full(torch.device('cuda:0'), dtype, requires_grad)
|
||||
|
||||
|
@ -94,14 +94,14 @@ class TestTensorBoardPyTorchNumpy(BaseTestCase):
|
||||
self.assertIsInstance(make_np(tensor), np.ndarray)
|
||||
|
||||
# CUDA tensor
|
||||
if torch.cuda.device_count() > 0:
|
||||
if torch.cuda.is_available():
|
||||
self.assertIsInstance(make_np(tensor.cuda()), np.ndarray)
|
||||
|
||||
# regular variable
|
||||
self.assertIsInstance(make_np(torch.autograd.Variable(tensor)), np.ndarray)
|
||||
|
||||
# CUDA variable
|
||||
if torch.cuda.device_count() > 0:
|
||||
if torch.cuda.is_available():
|
||||
self.assertIsInstance(make_np(torch.autograd.Variable(tensor).cuda()), np.ndarray)
|
||||
|
||||
# python primitive type
|
||||
|
Reference in New Issue
Block a user