[BE] Use TEST_MULTIGPU from common_cuda.py (#103982)

Comment about `TEST_CUDNN` called over and over has long been alleviated by wrapping the check with `LazyVal`, that caches the results.
Also, delete unused `TEST_MAGMA`.

Prep change for https://github.com/pytorch/pytorch/issues/100006

<!--
copilot:poem
-->
### <samp>🤖 Generated by Copilot at e3a5b39</samp>

> _`common_cuda.py`_
> _Refactored for dynamo tests_
> _Winter code cleanup_

Pull Request resolved: https://github.com/pytorch/pytorch/pull/103982
Approved by: https://github.com/atalman, https://github.com/janeyx99
This commit is contained in:
Nikita Shulga
2023-06-22 00:07:44 +00:00
committed by PyTorch MergeBot
parent eed287ec19
commit cd05c3b98c
4 changed files with 9 additions and 28 deletions

View File

@ -49,13 +49,12 @@ from torch.nn import functional as F
from torch.testing._internal.common_cuda import (
PLATFORM_SUPPORTS_FUSED_SDPA,
SM80OrLater,
TEST_CUDA,
TEST_MULTIGPU,
)
from torch.testing._internal.common_utils import freeze_rng_state, IS_FBCODE
from torch.testing._internal.jit_utils import JitTestCase
TEST_CUDA = torch.cuda.is_available()
TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2
mytuple = collections.namedtuple("mytuple", ["a", "b", "ab"])

View File

@ -35,6 +35,7 @@ from torch.testing._internal.common_utils import TestCase, freeze_rng_state, run
NO_MULTIPROCESSING_SPAWN, skipIfRocm, load_tests, IS_REMOTE_GPU, IS_SANDCASTLE, IS_WINDOWS, \
slowTest, skipCUDANonDefaultStreamIf, skipCUDAMemoryLeakCheckIf, TEST_CUDA, TEST_CUDA_GRAPH, TEST_WITH_ROCM, TEST_NUMPY, \
get_cycles_per_ms, parametrize, instantiate_parametrized_tests, subtest, IS_JETSON, gcIfJetson, NoTest, IS_LINUX
from torch.testing._internal.common_cuda import TEST_CUDNN, TEST_MULTIGPU
from torch.testing._internal.autocast_test_lists import AutocastTestLists
@ -42,12 +43,6 @@ from torch.testing._internal.autocast_test_lists import AutocastTestLists
# sharding on sandcastle. This line silences flake warnings
load_tests = load_tests
# We cannot import TEST_CUDA and TEST_MULTIGPU from torch.testing._internal.common_cuda here,
# because if we do that, the TEST_CUDNN line from torch.testing._internal.common_cuda will be executed
# multiple times as well during the execution of this test suite, and it will
# cause CUDA OOM error on Windows.
TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2
if not TEST_CUDA:
print('CUDA not available, skipping tests', file=sys.stderr)
TestCase = NoTest # noqa: F811
@ -64,13 +59,9 @@ skipIfNoTorchVision = unittest.skipIf(not HAS_TORCHVISION, "no torchvision")
TEST_CUDAMALLOCASYNC = TEST_CUDA and (torch.cuda.get_allocator_backend() == "cudaMallocAsync")
TEST_LARGE_TENSOR = TEST_CUDA
TEST_MEDIUM_TENSOR = TEST_CUDA
TEST_CUDNN = TEST_CUDA
TEST_BF16 = False
TEST_PYNVML = not torch.cuda._HAS_PYNVML
if TEST_CUDA:
torch.ones(1).cuda() # initialize cuda context
TEST_CUDNN = TEST_CUDA and (TEST_WITH_ROCM or
torch.backends.cudnn.is_acceptable(torch.tensor(1., device=torch.device('cuda:0'))))
TEST_LARGE_TENSOR = torch.cuda.get_device_properties(0).total_memory >= 12e9
TEST_MEDIUM_TENSOR = torch.cuda.get_device_properties(0).total_memory >= 6e9
TEST_BF16 = torch.cuda.is_bf16_supported()

View File

@ -2,18 +2,12 @@
import torch
from torch.testing._internal.common_utils import TestCase, run_tests, skipIfRocmVersionLessThan, NoTest
from torch.testing._internal.common_cuda import TEST_CUDA, TEST_MULTIGPU
import sys
import unittest
# NOTE: this needs to be run in a brand new process
# We cannot import TEST_CUDA and TEST_MULTIGPU from torch.testing._internal.common_cuda here,
# because if we do that, the TEST_CUDNN line from torch.testing._internal.common_cuda will be executed
# multiple times as well during the execution of this test suite, and it will
# cause CUDA OOM error on Windows.
TEST_CUDA = torch.cuda.is_available()
TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2
if not TEST_CUDA:
print('CUDA not available, skipping tests', file=sys.stderr)
TestCase = NoTest # noqa: F811

View File

@ -15,7 +15,11 @@ TEST_CUDA = torch.cuda.is_available()
TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2
CUDA_DEVICE = torch.device("cuda:0") if TEST_CUDA else None
# note: if ROCm is targeted, TEST_CUDNN is code for TEST_MIOPEN
TEST_CUDNN = LazyVal(lambda: TEST_CUDA and torch.backends.cudnn.is_acceptable(torch.tensor(1., device=CUDA_DEVICE)))
if TEST_WITH_ROCM:
TEST_CUDNN = LazyVal(lambda: TEST_CUDA)
else:
TEST_CUDNN = LazyVal(lambda: TEST_CUDA and torch.backends.cudnn.is_acceptable(torch.tensor(1., device=CUDA_DEVICE)))
TEST_CUDNN_VERSION = LazyVal(lambda: torch.backends.cudnn.version() if TEST_CUDNN else 0)
SM53OrLater = LazyVal(lambda: torch.cuda.is_available() and torch.cuda.get_device_capability() >= (5, 3))
@ -26,13 +30,6 @@ SM90OrLater = LazyVal(lambda: torch.cuda.is_available() and torch.cuda.get_devic
PLATFORM_SUPPORTS_FUSED_SDPA: bool = TEST_CUDA and not TEST_WITH_ROCM
TEST_MAGMA = TEST_CUDA
if TEST_CUDA:
def test_has_magma():
torch.ones(1).cuda() # has_magma shows up after cuda is initialized
return torch.cuda.has_magma
TEST_MAGMA = LazyVal(test_has_magma)
if TEST_NUMBA:
import numba.cuda
TEST_NUMBA_CUDA = numba.cuda.is_available()