[Bugfix] Fixing max token error message for openai compatible server (#4016)

This commit is contained in:
Jack Gordley
2024-04-23 12:06:29 +01:00
committed by GitHub
parent 62b8aebc6f
commit d3c8180ac4

View File

@ -206,6 +206,12 @@ class OpenAIServing:
token_num = len(input_ids)
if request.max_tokens is None:
if token_num >= self.max_model_len:
raise ValueError(
f"This model's maximum context length is "
f"{self.max_model_len} tokens. However, you requested "
f"{token_num} tokens in the messages, "
f"Please reduce the length of the messages.", )
request.max_tokens = self.max_model_len - token_num
if token_num + request.max_tokens > self.max_model_len: