[Bugfix] Include encoder prompts len to non-stream api usage response (#8861)

This commit is contained in:
Pernekhan Utemuratov
2024-09-26 15:47:00 -07:00
committed by GitHub
parent d9cfbc891e
commit 93d364da34

View File

@ -726,6 +726,8 @@ class OpenAIServingChat(OpenAIServing):
assert final_res.prompt_token_ids is not None
num_prompt_tokens = len(final_res.prompt_token_ids)
if final_res.encoder_prompt_token_ids is not None:
num_prompt_tokens += len(final_res.encoder_prompt_token_ids)
num_generated_tokens = sum(
len(output.token_ids) for output in final_res.outputs)
usage = UsageInfo(