[Bugfix] Include encoder prompts len to non-stream api usage response (#8861)

2025-10-20 23:03:52 +08:00 · 2024-09-26 15:47:00 -07:00
parent d9cfbc891e
commit 93d364da34
1 changed files with 2 additions and 0 deletions
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@ -726,6 +726,8 @@ class OpenAIServingChat(OpenAIServing):
        assert final_res.prompt_token_ids is not None
        num_prompt_tokens = len(final_res.prompt_token_ids)
        if final_res.encoder_prompt_token_ids is not None:
            num_prompt_tokens += len(final_res.encoder_prompt_token_ids)
        num_generated_tokens = sum(
            len(output.token_ids) for output in final_res.outputs)
        usage = UsageInfo(