[Bugfix] Apply same sampling parameters for both n=1 and n>1 (#26005)

Signed-off-by: Kenichi Maehashi <maehashi@preferred.jp>
2025-10-20 14:53:52 +08:00 · 2025-10-01 23:37:35 +09:00
parent f9e714813a
commit 3b7c20a6b5
1 changed files with 1 additions and 1 deletions
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@ -290,7 +290,7 @@ class AsyncLLM(EngineClient):
            return queue

        # Fan out child requests (for n>1).
-        parent_request = ParentRequest(request_id, params)
+        parent_request = ParentRequest(request_id, request.sampling_params)
        for idx in range(params.n):
            request_id, params = parent_request.get_child_info(idx)
            child_request = request if idx == params.n - 1 else copy(request)