[Bugfix] Better error message for MLPSpeculator when num_speculative_tokens is set too high (#5894)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
Thomas Parnell
2024-06-28 16:42:17 +02:00
committed by GitHub
parent 57f09a419c
commit ec1ad0046c

View File

@ -956,9 +956,9 @@ class SpeculativeConfig:
# Verify provided value doesn't exceed the maximum
# supported by the draft model.
raise ValueError(
"Expected both speculative_model and "
"num_speculative_tokens to be provided, but found "
f"{speculative_model=} and {num_speculative_tokens=}.")
"This speculative model supports a maximum of "
f"num_speculative_tokens={n_predict}, but "
f"{num_speculative_tokens=} was provided.")
draft_model_config.max_model_len = (
SpeculativeConfig._maybe_override_draft_max_model_len(