[Bugfix] DeepGEMM is not enabled on B200 due to _lazy_init() (#21472)

Signed-off-by: Clayton Coleman <smarterclayton@gmail.com> Co-authored-by: mgoin <mgoin64@gmail.com>
2025-10-20 14:53:52 +08:00 · 2025-07-28 16:51:22 -04:00
parent b18b417fbf
commit c6f36cfa26
1 changed files with 9 additions and 5 deletions
--- a/vllm/utils/deep_gemm.py
+++ b/vllm/utils/deep_gemm.py
@ -13,7 +13,8 @@ from typing import Any, Callable, NoReturn
 import torch

 import vllm.envs as envs
-from vllm.utils import cuda_get_device_properties, has_deep_gemm
+from vllm.platforms import current_platform
+from vllm.utils import has_deep_gemm


@functools.cache
@ -21,12 +22,15 @@ def is_blackwell_deep_gemm_used() -> bool:
    """Return ``True`` if vLLM is configured to use DeepGEMM on a
    Blackwell-class GPU.
    """
-
-    if not (envs.VLLM_USE_DEEP_GEMM and has_deep_gemm()
-            and _per_block_cast_impl is not None):
+    if not (envs.VLLM_USE_DEEP_GEMM and has_deep_gemm()):
        return False

-    return cuda_get_device_properties(0, ("major", ))[0] == 10
+    _lazy_init()
+    if _per_block_cast_impl is None:
+        return False
+
+    return (current_platform.is_cuda()
+            and current_platform.is_device_capability(100))


 def _missing(*_: Any, **__: Any) -> NoReturn: