mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
[Misc] Temporarily resolve the error of BitAndBytes (#7308)
This commit is contained in:
@ -322,8 +322,9 @@ class ModelConfig:
|
||||
"BitAndBytes quantization with TP or PP is not supported yet.")
|
||||
|
||||
if self.quantization == "bitsandbytes" and self.enforce_eager is False:
|
||||
raise ValueError(
|
||||
"BitAndBytes with enforce_eager = False is not supported yet.")
|
||||
logger.warning("CUDA graph is not supported on BitAndBytes yet, "
|
||||
"fallback to the eager mode.")
|
||||
self.enforce_eager = True
|
||||
|
||||
def get_hf_config_sliding_window(self) -> Optional[int]:
|
||||
"""Get the sliding window size, or None if disabled."""
|
||||
|
Reference in New Issue
Block a user