mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Frontend] Add tqdm_leave_pbar to control progress bar visibility (#19357)
Signed-off-by: reidliu41 <reid201711@gmail.com> Co-authored-by: reidliu41 <reid201711@gmail.com>
This commit is contained in:
@ -281,7 +281,7 @@ class LLM:
|
||||
sampling_params: Optional[Union[SamplingParams,
|
||||
Sequence[SamplingParams]]] = None,
|
||||
*,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
guided_options_request: Optional[Union[LLMGuidedOptions,
|
||||
@ -297,7 +297,7 @@ class LLM:
|
||||
sampling_params: Optional[Union[SamplingParams,
|
||||
list[SamplingParams]]] = None,
|
||||
prompt_token_ids: Optional[list[int]] = None,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
guided_options_request: Optional[Union[LLMGuidedOptions,
|
||||
@ -313,7 +313,7 @@ class LLM:
|
||||
sampling_params: Optional[Union[SamplingParams,
|
||||
list[SamplingParams]]] = None,
|
||||
prompt_token_ids: Optional[list[list[int]]] = None,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
guided_options_request: Optional[Union[LLMGuidedOptions,
|
||||
@ -330,7 +330,7 @@ class LLM:
|
||||
list[SamplingParams]]] = None,
|
||||
*,
|
||||
prompt_token_ids: list[int],
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
guided_options_request: Optional[Union[LLMGuidedOptions,
|
||||
@ -347,7 +347,7 @@ class LLM:
|
||||
list[SamplingParams]]] = None,
|
||||
*,
|
||||
prompt_token_ids: list[list[int]],
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
guided_options_request: Optional[Union[LLMGuidedOptions,
|
||||
@ -362,7 +362,7 @@ class LLM:
|
||||
prompts: None,
|
||||
sampling_params: None,
|
||||
prompt_token_ids: Union[list[int], list[list[int]]],
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
guided_options_request: Optional[Union[LLMGuidedOptions,
|
||||
@ -382,7 +382,7 @@ class LLM:
|
||||
sampling_params: Optional[Union[SamplingParams,
|
||||
Sequence[SamplingParams]]] = None,
|
||||
prompt_token_ids: Optional[Union[list[int], list[list[int]]]] = None,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
guided_options_request: Optional[Union[LLMGuidedOptions,
|
||||
@ -404,7 +404,10 @@ class LLM:
|
||||
When it is a single value, it is applied to every prompt.
|
||||
When it is a list, the list must have the same length as the
|
||||
prompts and it is paired one by one with the prompt.
|
||||
use_tqdm: Whether to use tqdm to display the progress bar.
|
||||
use_tqdm: If `True`, shows a tqdm progress bar.
|
||||
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
|
||||
it is used to create the progress bar.
|
||||
If `False`, no progress bar is created.
|
||||
lora_request: LoRA request to use for generation, if any.
|
||||
prompt_adapter_request: Prompt Adapter request to use for
|
||||
generation, if any.
|
||||
@ -678,7 +681,7 @@ class LLM:
|
||||
list[list[ChatCompletionMessageParam]]],
|
||||
sampling_params: Optional[Union[SamplingParams,
|
||||
list[SamplingParams]]] = None,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[LoRARequest] = None,
|
||||
chat_template: Optional[str] = None,
|
||||
chat_template_content_format: ChatTemplateContentFormatOption = "auto",
|
||||
@ -709,7 +712,10 @@ class LLM:
|
||||
is a single value, it is applied to every prompt. When it
|
||||
is a list, the list must have the same length as the
|
||||
prompts and it is paired one by one with the prompt.
|
||||
use_tqdm: Whether to use tqdm to display the progress bar.
|
||||
use_tqdm: If `True`, shows a tqdm progress bar.
|
||||
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
|
||||
it is used to create the progress bar.
|
||||
If `False`, no progress bar is created.
|
||||
lora_request: LoRA request to use for generation, if any.
|
||||
chat_template: The template to use for structuring the chat.
|
||||
If not provided, the model's default chat template will be used.
|
||||
@ -823,7 +829,7 @@ class LLM:
|
||||
Sequence[PoolingParams]]] = None,
|
||||
*,
|
||||
truncate_prompt_tokens: Optional[int] = None,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
) -> list[PoolingRequestOutput]:
|
||||
@ -838,7 +844,7 @@ class LLM:
|
||||
Sequence[PoolingParams]]] = None,
|
||||
prompt_token_ids: Optional[list[int]] = None,
|
||||
truncate_prompt_tokens: Optional[int] = None,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
) -> list[PoolingRequestOutput]:
|
||||
@ -853,7 +859,7 @@ class LLM:
|
||||
Sequence[PoolingParams]]] = None,
|
||||
prompt_token_ids: Optional[list[list[int]]] = None,
|
||||
truncate_prompt_tokens: Optional[int] = None,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
) -> list[PoolingRequestOutput]:
|
||||
@ -869,7 +875,7 @@ class LLM:
|
||||
*,
|
||||
prompt_token_ids: list[int],
|
||||
truncate_prompt_tokens: Optional[int] = None,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
) -> list[PoolingRequestOutput]:
|
||||
@ -885,7 +891,7 @@ class LLM:
|
||||
*,
|
||||
prompt_token_ids: list[list[int]],
|
||||
truncate_prompt_tokens: Optional[int] = None,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
) -> list[PoolingRequestOutput]:
|
||||
@ -899,7 +905,7 @@ class LLM:
|
||||
pooling_params: None,
|
||||
prompt_token_ids: Union[list[int], list[list[int]]],
|
||||
truncate_prompt_tokens: Optional[int] = None,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
) -> list[PoolingRequestOutput]:
|
||||
@ -918,7 +924,7 @@ class LLM:
|
||||
Sequence[PoolingParams]]] = None,
|
||||
prompt_token_ids: Optional[Union[list[int], list[list[int]]]] = None,
|
||||
truncate_prompt_tokens: Optional[int] = None,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
) -> list[PoolingRequestOutput]:
|
||||
@ -935,7 +941,10 @@ class LLM:
|
||||
for more details about the format of each prompts.
|
||||
pooling_params: The pooling parameters for pooling. If None, we
|
||||
use the default pooling parameters.
|
||||
use_tqdm: Whether to use tqdm to display the progress bar.
|
||||
use_tqdm: If `True`, shows a tqdm progress bar.
|
||||
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
|
||||
it is used to create the progress bar.
|
||||
If `False`, no progress bar is created.
|
||||
lora_request: LoRA request to use for generation, if any.
|
||||
prompt_adapter_request: Prompt Adapter request to use for
|
||||
generation, if any.
|
||||
@ -1005,7 +1014,7 @@ class LLM:
|
||||
/,
|
||||
*,
|
||||
truncate_prompt_tokens: Optional[int] = None,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
pooling_params: Optional[Union[PoolingParams,
|
||||
Sequence[PoolingParams]]] = None,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
@ -1024,7 +1033,10 @@ class LLM:
|
||||
for more details about the format of each prompts.
|
||||
pooling_params: The pooling parameters for pooling. If None, we
|
||||
use the default pooling parameters.
|
||||
use_tqdm: Whether to use tqdm to display the progress bar.
|
||||
use_tqdm: If `True`, shows a tqdm progress bar.
|
||||
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
|
||||
it is used to create the progress bar.
|
||||
If `False`, no progress bar is created.
|
||||
lora_request: LoRA request to use for generation, if any.
|
||||
prompt_adapter_request: Prompt Adapter request to use for
|
||||
generation, if any.
|
||||
@ -1051,7 +1063,7 @@ class LLM:
|
||||
prompts: Union[PromptType, Sequence[PromptType]],
|
||||
/,
|
||||
*,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
) -> list[ClassificationRequestOutput]:
|
||||
@ -1066,7 +1078,10 @@ class LLM:
|
||||
prompts: The prompts to the LLM. You may pass a sequence of prompts
|
||||
for batch inference. See [PromptType][vllm.inputs.PromptType]
|
||||
for more details about the format of each prompts.
|
||||
use_tqdm: Whether to use tqdm to display the progress bar.
|
||||
use_tqdm: If `True`, shows a tqdm progress bar.
|
||||
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
|
||||
it is used to create the progress bar.
|
||||
If `False`, no progress bar is created.
|
||||
lora_request: LoRA request to use for generation, if any.
|
||||
prompt_adapter_request: Prompt Adapter request to use for
|
||||
generation, if any.
|
||||
@ -1092,7 +1107,7 @@ class LLM:
|
||||
text_1: list[Union[str, TextPrompt, TokensPrompt]],
|
||||
text_2: list[Union[str, TextPrompt, TokensPrompt]],
|
||||
truncate_prompt_tokens: Optional[int] = None,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
) -> list[ScoringRequestOutput]:
|
||||
@ -1126,7 +1141,7 @@ class LLM:
|
||||
text_1: list[str],
|
||||
text_2: list[str],
|
||||
truncate_prompt_tokens: Optional[int] = None,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
) -> list[ScoringRequestOutput]:
|
||||
@ -1178,7 +1193,7 @@ class LLM:
|
||||
/,
|
||||
*,
|
||||
truncate_prompt_tokens: Optional[int] = None,
|
||||
use_tqdm: bool = True,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
) -> list[ScoringRequestOutput]:
|
||||
@ -1198,7 +1213,10 @@ class LLM:
|
||||
text_2: The texts to pair with the query to form the input
|
||||
to the LLM. See [PromptType][vllm.inputs.PromptType] for
|
||||
more details about the format of each prompts.
|
||||
use_tqdm: Whether to use tqdm to display the progress bar.
|
||||
use_tqdm: If `True`, shows a tqdm progress bar.
|
||||
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
|
||||
it is used to create the progress bar.
|
||||
If `False`, no progress bar is created.
|
||||
lora_request: LoRA request to use for generation, if any.
|
||||
prompt_adapter_request: Prompt Adapter request to use for
|
||||
generation, if any.
|
||||
@ -1379,7 +1397,7 @@ class LLM:
|
||||
params: Union[SamplingParams, Sequence[SamplingParams], PoolingParams,
|
||||
Sequence[PoolingParams]],
|
||||
*,
|
||||
use_tqdm: bool,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[Sequence[LoRARequest], LoRARequest]],
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest],
|
||||
tokenization_kwargs: Optional[dict[str, Any]] = None,
|
||||
@ -1417,7 +1435,8 @@ class LLM:
|
||||
# Add requests to the engine.
|
||||
it = prompts
|
||||
if use_tqdm:
|
||||
it = tqdm(it, desc="Adding requests")
|
||||
tqdm_func = use_tqdm if callable(use_tqdm) else tqdm
|
||||
it = tqdm_func(it, desc="Adding requests")
|
||||
|
||||
for i, prompt in enumerate(it):
|
||||
self._add_request(
|
||||
@ -1474,12 +1493,15 @@ class LLM:
|
||||
return params
|
||||
|
||||
def _run_engine(
|
||||
self, *, use_tqdm: bool
|
||||
self,
|
||||
*,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True
|
||||
) -> list[Union[RequestOutput, PoolingRequestOutput]]:
|
||||
# Initialize tqdm.
|
||||
if use_tqdm:
|
||||
num_requests = self.llm_engine.get_num_unfinished_requests()
|
||||
pbar = tqdm(
|
||||
tqdm_func = use_tqdm if callable(use_tqdm) else tqdm
|
||||
pbar = tqdm_func(
|
||||
total=num_requests,
|
||||
desc="Processed prompts",
|
||||
dynamic_ncols=True,
|
||||
|
Reference in New Issue
Block a user