mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
[Bugfix] Better error message for MLPSpeculator when num_speculative_tokens
is set too high (#5894)
Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
@ -956,9 +956,9 @@ class SpeculativeConfig:
|
||||
# Verify provided value doesn't exceed the maximum
|
||||
# supported by the draft model.
|
||||
raise ValueError(
|
||||
"Expected both speculative_model and "
|
||||
"num_speculative_tokens to be provided, but found "
|
||||
f"{speculative_model=} and {num_speculative_tokens=}.")
|
||||
"This speculative model supports a maximum of "
|
||||
f"num_speculative_tokens={n_predict}, but "
|
||||
f"{num_speculative_tokens=} was provided.")
|
||||
|
||||
draft_model_config.max_model_len = (
|
||||
SpeculativeConfig._maybe_override_draft_max_model_len(
|
||||
|
Reference in New Issue
Block a user