Pipeline Parallel: Guard for KeyErrors at request abort (#6587)

Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com>
2025-10-20 14:53:52 +08:00 · 2024-07-19 20:18:19 -06:00
parent 7bd82002ae
commit 3f8d42c81f
2 changed files with 9 additions and 2 deletions
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@ -131,7 +131,10 @@ class RequestTracker:
        """Process a request output from the engine."""
        request_id = request_output.request_id

-        self._request_streams[request_id].put(request_output)
+        # Guard against a KeyError which can occur if the request was aborted
+        # while the output was generated
+        if (stream := self._request_streams.get(request_id)) is not None:
+            stream.put(request_output)
        if request_output.finished:
            if verbose:
                logger.info("Finished request %s.", request_id)
--- a/vllm/engine/output_processor/single_step.py
+++ b/vllm/engine/output_processor/single_step.py
@ -90,7 +90,11 @@ class SingleStepOutputProcessor(SequenceGroupOutputProcessor):
            for parent_seq in parent_seqs
        }
        for sample in samples:
-            parent_child_dict[sample.parent_seq_id].append(sample)
+            # Guard against a KeyError which can occur if the request was
+            # aborted while the output was generated
+            if (child_list :=
+                    parent_child_dict.get(sample.parent_seq_id)) is not None:
+                child_list.append(sample)
        # List of (child, parent)
        child_seqs: List[Tuple[Sequence, Sequence]] = []