mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
@ -455,7 +455,6 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
|
||||
self.enable_prompt_adapter = (self.runner.prompt_adapter_config
|
||||
is not None)
|
||||
self.multi_modal_input_mapper = self.runner.multi_modal_input_mapper
|
||||
self.decode_only = True
|
||||
|
||||
# Attention metadata inputs.
|
||||
if self.attn_backend is not None:
|
||||
@ -477,6 +476,10 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
|
||||
finished_requests_ids: Optional[List[str]] = None) -> None:
|
||||
self.finished_requests_ids = finished_requests_ids
|
||||
|
||||
# if the current batch is decode-only.
|
||||
# will be set to False if there is any non-decode request.
|
||||
self.decode_only = True
|
||||
|
||||
# Intermediate data (data in CPU before going to GPU) for
|
||||
# the current sequence group.
|
||||
self.inter_data_list: List[
|
||||
|
Reference in New Issue
Block a user