mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Bugfix] Fixing max token error message for openai compatible server (#4016)
This commit is contained in:
@ -206,6 +206,12 @@ class OpenAIServing:
|
||||
token_num = len(input_ids)
|
||||
|
||||
if request.max_tokens is None:
|
||||
if token_num >= self.max_model_len:
|
||||
raise ValueError(
|
||||
f"This model's maximum context length is "
|
||||
f"{self.max_model_len} tokens. However, you requested "
|
||||
f"{token_num} tokens in the messages, "
|
||||
f"Please reduce the length of the messages.", )
|
||||
request.max_tokens = self.max_model_len - token_num
|
||||
|
||||
if token_num + request.max_tokens > self.max_model_len:
|
||||
|
Reference in New Issue
Block a user