mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
[Misc] Temporarily resolve the error of BitAndBytes (#7308)
This commit is contained in:
@ -322,8 +322,9 @@ class ModelConfig:
|
|||||||
"BitAndBytes quantization with TP or PP is not supported yet.")
|
"BitAndBytes quantization with TP or PP is not supported yet.")
|
||||||
|
|
||||||
if self.quantization == "bitsandbytes" and self.enforce_eager is False:
|
if self.quantization == "bitsandbytes" and self.enforce_eager is False:
|
||||||
raise ValueError(
|
logger.warning("CUDA graph is not supported on BitAndBytes yet, "
|
||||||
"BitAndBytes with enforce_eager = False is not supported yet.")
|
"fallback to the eager mode.")
|
||||||
|
self.enforce_eager = True
|
||||||
|
|
||||||
def get_hf_config_sliding_window(self) -> Optional[int]:
|
def get_hf_config_sliding_window(self) -> Optional[int]:
|
||||||
"""Get the sliding window size, or None if disabled."""
|
"""Get the sliding window size, or None if disabled."""
|
||||||
|
Reference in New Issue
Block a user