mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
[Bugfix] Include encoder prompts len to non-stream api usage response (#8861)
This commit is contained in:
committed by
GitHub
parent
d9cfbc891e
commit
93d364da34
@ -726,6 +726,8 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
|
|
||||||
assert final_res.prompt_token_ids is not None
|
assert final_res.prompt_token_ids is not None
|
||||||
num_prompt_tokens = len(final_res.prompt_token_ids)
|
num_prompt_tokens = len(final_res.prompt_token_ids)
|
||||||
|
if final_res.encoder_prompt_token_ids is not None:
|
||||||
|
num_prompt_tokens += len(final_res.encoder_prompt_token_ids)
|
||||||
num_generated_tokens = sum(
|
num_generated_tokens = sum(
|
||||||
len(output.token_ids) for output in final_res.outputs)
|
len(output.token_ids) for output in final_res.outputs)
|
||||||
usage = UsageInfo(
|
usage = UsageInfo(
|
||||||
|
Reference in New Issue
Block a user