[Misc] Temporarily resolve the error of BitAndBytes (#7308)

2025-10-20 23:03:52 +08:00 · 2024-08-09 04:42:58 +08:00
parent 8334c39f37
commit a049b107e2
1 changed files with 3 additions and 2 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -322,8 +322,9 @@ class ModelConfig:
                "BitAndBytes quantization with TP or PP is not supported yet.")
        if self.quantization == "bitsandbytes" and self.enforce_eager is False:
-            raise ValueError(
+            logger.warning("CUDA graph is not supported on BitAndBytes yet, "
-                "BitAndBytes with enforce_eager = False is not supported yet.")
+                           "fallback to the eager mode.")
            self.enforce_eager = True
    def get_hf_config_sliding_window(self) -> Optional[int]:
        """Get the sliding window size, or None if disabled."""