Move cuda check into utils (#3074)

Co-authored-by: Jeff Rasley <jerasley@microsoft.com>
2025-10-20 15:33:51 +08:00 · 2023-03-23 21:22:48 -07:00
parent 090d49e79f
commit b3ec1c9712
2 changed files with 22 additions and 17 deletions
--- a/tests/unit/ops/sparse_attention/test_sparse_attention.py
+++ b/tests/unit/ops/sparse_attention/test_sparse_attention.py
@ -10,6 +10,7 @@ import torch
 import deepspeed
 from deepspeed.accelerator import get_accelerator
 from deepspeed.ops.op_builder import SparseAttnBuilder
+from unit.util import skip_on_arch, skip_on_cuda

 if not deepspeed.ops.__compatible_ops__[SparseAttnBuilder.NAME]:
    pytest.skip("sparse attention op is not compatible on this system",
@ -130,26 +131,14 @@ def init_softmax_inputs(Z, H, M, N, scale, rho, block, dtype, dense_x=True, layo
    return layout, x, dx, bool_attn_mask, fp_attn_mask, kp_mask


-def _skip_on_cuda_compatability():
-    if deepspeed.accelerator.get_accelerator().device_name() == 'cuda':
-        if torch.cuda.get_device_capability()[0] < 7:
-            pytest.skip("needs higher compute capability than 7")
-        cuda_major = int(torch.version.cuda.split('.')[0]) * 10
-        cuda_minor = int(torch.version.cuda.split('.')[1])
-        cuda_version = cuda_major + cuda_minor
-        if (cuda_version != 101 and cuda_version != 102) and \
-                (cuda_version != 111 and cuda_version != 110):
-            pytest.skip("requires cuda 10.1 or 10.2 or 11.0 or 11.1")
-    else:
-        assert deepspeed.accelerator.get_accelerator().device_name() == 'xpu'
-        return
-
-
@pytest.mark.parametrize("block", [16, 32])
@pytest.mark.parametrize("width", [256, 576])
@pytest.mark.parametrize("dtype", [torch.float16, torch.float32])
 def test_softmax(block, width, dtype):
-    _skip_on_cuda_compatability()
+    valid_cuda_versions = [101, 102, 110, 111]
+    skip_on_arch(min_arch=7)
+    skip_on_cuda(valid_cuda=valid_cuda_versions)
+
    Z = 2
    H = 4
    scale = 0.4
@ -256,7 +245,10 @@ testdata = [

@pytest.mark.parametrize("block, dtype, mode, trans_a, trans_b", testdata)
 def test_matmul(block, dtype, mode, trans_a, trans_b):
-    _skip_on_cuda_compatability()
+    valid_cuda_versions = [101, 102, 110, 111]
+    skip_on_arch(min_arch=7)
+    skip_on_cuda(valid_cuda=valid_cuda_versions)
+
    Z = 3
    H = 2
    M = 128
@ -266,6 +258,7 @@ def test_matmul(block, dtype, mode, trans_a, trans_b):
    x, w, dy, shape, layout = init_matmul_inputs(Z, H, M, N, K, rho, mode, trans_a, trans_b, block, dtype, layout=None)
    ref_y, ref_dx, ref_dw = run_matmul_reference(x.clone(), w.clone(), mode, trans_a, trans_b, layout, block, dy)
    st_y, st_dx, st_dw = run_matmul_sparse(x.clone(), w.clone(), mode, trans_a, trans_b, layout, block, dy)
+
    assert allclose(ref_y, st_y)
    assert allclose(ref_dx, st_dx)
    assert allclose(ref_dw, st_dw)
--- a/tests/unit/util.py
+++ b/tests/unit/util.py
@ -15,6 +15,18 @@ def skip_on_arch(min_arch=7):
        return


+def skip_on_cuda(valid_cuda):
+    split_version = lambda x: map(int, x.split('.')[:2])
+    if deepspeed.accelerator.get_accelerator().device_name() == 'cuda':
+        CUDA_MAJOR, CUDA_MINOR = split_version(torch_info['cuda_version'])
+        CUDA_VERSION = (CUDA_MAJOR * 10) + CUDA_MINOR
+        if valid_cuda.count(CUDA_VERSION) == 0:
+            pytest.skip(f"requires cuda versions {valid_cuda}")
+    else:
+        assert deepspeed.accelerator.get_accelerator().device_name() == 'xpu'
+        return
+
+
 def required_torch_version():
    TORCH_MAJOR = int(torch.__version__.split('.')[0])
    TORCH_MINOR = int(torch.__version__.split('.')[1])