[BE] Use TEST_MULTIGPU from common_cuda.py (#103982)

Comment about `TEST_CUDNN` called over and over has long been alleviated by wrapping the check with `LazyVal`, that caches the results. Also, delete unused `TEST_MAGMA`. Prep change for https://github.com/pytorch/pytorch/issues/100006  ### <samp>🤖 Generated by Copilot at e3a5b39</samp> > _`common_cuda.py`_ > _Refactored for dynamo tests_ > _Winter code cleanup_ Pull Request resolved: https://github.com/pytorch/pytorch/pull/103982 Approved by: https://github.com/atalman, https://github.com/janeyx99
2025-10-20 21:14:14 +08:00 · 2023-06-22 00:07:44 +00:00
parent eed287ec19
commit cd05c3b98c
4 changed files with 9 additions and 28 deletions
--- a/test/dynamo/test_misc.py
+++ b/test/dynamo/test_misc.py
@ -49,13 +49,12 @@ from torch.nn import functional as F
 from torch.testing._internal.common_cuda import (
    PLATFORM_SUPPORTS_FUSED_SDPA,
    SM80OrLater,
+    TEST_CUDA,
+    TEST_MULTIGPU,
 )
 from torch.testing._internal.common_utils import freeze_rng_state, IS_FBCODE
 from torch.testing._internal.jit_utils import JitTestCase

-TEST_CUDA = torch.cuda.is_available()
-TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2
-
 mytuple = collections.namedtuple("mytuple", ["a", "b", "ab"])


--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@ -35,6 +35,7 @@ from torch.testing._internal.common_utils import TestCase, freeze_rng_state, run
    NO_MULTIPROCESSING_SPAWN, skipIfRocm, load_tests, IS_REMOTE_GPU, IS_SANDCASTLE, IS_WINDOWS, \
    slowTest, skipCUDANonDefaultStreamIf, skipCUDAMemoryLeakCheckIf, TEST_CUDA, TEST_CUDA_GRAPH, TEST_WITH_ROCM, TEST_NUMPY, \
    get_cycles_per_ms, parametrize, instantiate_parametrized_tests, subtest, IS_JETSON, gcIfJetson, NoTest, IS_LINUX
+from torch.testing._internal.common_cuda import TEST_CUDNN, TEST_MULTIGPU
 from torch.testing._internal.autocast_test_lists import AutocastTestLists


@ -42,12 +43,6 @@ from torch.testing._internal.autocast_test_lists import AutocastTestLists
 # sharding on sandcastle. This line silences flake warnings
 load_tests = load_tests

-# We cannot import TEST_CUDA and TEST_MULTIGPU from torch.testing._internal.common_cuda here,
-# because if we do that, the TEST_CUDNN line from torch.testing._internal.common_cuda will be executed
-# multiple times as well during the execution of this test suite, and it will
-# cause CUDA OOM error on Windows.
-TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2
-
 if not TEST_CUDA:
    print('CUDA not available, skipping tests', file=sys.stderr)
    TestCase = NoTest  # noqa: F811
@ -64,13 +59,9 @@ skipIfNoTorchVision = unittest.skipIf(not HAS_TORCHVISION, "no torchvision")
 TEST_CUDAMALLOCASYNC = TEST_CUDA and (torch.cuda.get_allocator_backend() == "cudaMallocAsync")
 TEST_LARGE_TENSOR = TEST_CUDA
 TEST_MEDIUM_TENSOR = TEST_CUDA
-TEST_CUDNN = TEST_CUDA
 TEST_BF16 = False
 TEST_PYNVML = not torch.cuda._HAS_PYNVML
 if TEST_CUDA:
-    torch.ones(1).cuda()  # initialize cuda context
-    TEST_CUDNN = TEST_CUDA and (TEST_WITH_ROCM or
-                                torch.backends.cudnn.is_acceptable(torch.tensor(1., device=torch.device('cuda:0'))))
    TEST_LARGE_TENSOR = torch.cuda.get_device_properties(0).total_memory >= 12e9
    TEST_MEDIUM_TENSOR = torch.cuda.get_device_properties(0).total_memory >= 6e9
    TEST_BF16 = torch.cuda.is_bf16_supported()
--- a/test/test_cuda_primary_ctx.py
+++ b/test/test_cuda_primary_ctx.py
@ -2,18 +2,12 @@

 import torch
 from torch.testing._internal.common_utils import TestCase, run_tests, skipIfRocmVersionLessThan, NoTest
+from torch.testing._internal.common_cuda import TEST_CUDA, TEST_MULTIGPU
 import sys
 import unittest

 # NOTE: this needs to be run in a brand new process

-# We cannot import TEST_CUDA and TEST_MULTIGPU from torch.testing._internal.common_cuda here,
-# because if we do that, the TEST_CUDNN line from torch.testing._internal.common_cuda will be executed
-# multiple times as well during the execution of this test suite, and it will
-# cause CUDA OOM error on Windows.
-TEST_CUDA = torch.cuda.is_available()
-TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2
-
 if not TEST_CUDA:
    print('CUDA not available, skipping tests', file=sys.stderr)
    TestCase = NoTest  # noqa: F811
--- a/torch/testing/_internal/common_cuda.py
+++ b/torch/testing/_internal/common_cuda.py
@ -15,7 +15,11 @@ TEST_CUDA = torch.cuda.is_available()
 TEST_MULTIGPU = TEST_CUDA and torch.cuda.device_count() >= 2
 CUDA_DEVICE = torch.device("cuda:0") if TEST_CUDA else None
 # note: if ROCm is targeted, TEST_CUDNN is code for TEST_MIOPEN
-TEST_CUDNN = LazyVal(lambda: TEST_CUDA and torch.backends.cudnn.is_acceptable(torch.tensor(1., device=CUDA_DEVICE)))
+if TEST_WITH_ROCM:
+    TEST_CUDNN = LazyVal(lambda: TEST_CUDA)
+else:
+    TEST_CUDNN = LazyVal(lambda: TEST_CUDA and torch.backends.cudnn.is_acceptable(torch.tensor(1., device=CUDA_DEVICE)))
+
 TEST_CUDNN_VERSION = LazyVal(lambda: torch.backends.cudnn.version() if TEST_CUDNN else 0)

 SM53OrLater = LazyVal(lambda: torch.cuda.is_available() and torch.cuda.get_device_capability() >= (5, 3))
@ -26,13 +30,6 @@ SM90OrLater = LazyVal(lambda: torch.cuda.is_available() and torch.cuda.get_devic

 PLATFORM_SUPPORTS_FUSED_SDPA: bool = TEST_CUDA and not TEST_WITH_ROCM

-TEST_MAGMA = TEST_CUDA
-if TEST_CUDA:
-    def test_has_magma():
-        torch.ones(1).cuda()  # has_magma shows up after cuda is initialized
-        return torch.cuda.has_magma
-    TEST_MAGMA = LazyVal(test_has_magma)
-
 if TEST_NUMBA:
    import numba.cuda
    TEST_NUMBA_CUDA = numba.cuda.is_available()