mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Frontend] Add template related params to request (#5709)
This commit is contained in:
@ -6,7 +6,7 @@ numpy < 2.0.0
|
||||
requests
|
||||
tqdm
|
||||
py-cpuinfo
|
||||
transformers >= 4.42.0 # Required for Gemma 2.
|
||||
transformers >= 4.42.0 # Required for Gemma 2 and for additional chat template parameters.
|
||||
tokenizers >= 0.19.1 # Required for Llama 3.
|
||||
fastapi
|
||||
aiohttp
|
||||
|
@ -190,6 +190,27 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
"special tokens so this should be set to False (as is the "
|
||||
"default)."),
|
||||
)
|
||||
documents: Optional[List[Dict[str, str]]] = Field(
|
||||
default=None,
|
||||
description=
|
||||
("A list of dicts representing documents that will be accessible to "
|
||||
"the model if it is performing RAG (retrieval-augmented generation)."
|
||||
" If the template does not support RAG, this argument will have no "
|
||||
"effect. We recommend that each document should be a dict containing "
|
||||
"\"title\" and \"text\" keys."),
|
||||
)
|
||||
chat_template: Optional[str] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"A Jinja template to use for this conversion. "
|
||||
"If this is not passed, the model's default chat template will be "
|
||||
"used instead."),
|
||||
)
|
||||
chat_template_kwargs: Optional[Dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description=("Additional kwargs to pass to the template renderer. "
|
||||
"Will be accessible by the chat template."),
|
||||
)
|
||||
include_stop_str_in_output: Optional[bool] = Field(
|
||||
default=False,
|
||||
description=(
|
||||
|
@ -218,10 +218,18 @@ class OpenAIServingChat(OpenAIServing):
|
||||
conversation.extend(chat_parsed_result.messages)
|
||||
image_futures.extend(chat_parsed_result.image_futures)
|
||||
|
||||
tool_dicts = None if request.tools is None else [
|
||||
tool.model_dump() for tool in request.tools
|
||||
]
|
||||
|
||||
prompt = self.tokenizer.apply_chat_template(
|
||||
conversation=conversation,
|
||||
tokenize=False,
|
||||
add_generation_prompt=request.add_generation_prompt,
|
||||
tools=tool_dicts,
|
||||
documents=request.documents,
|
||||
chat_template=request.chat_template,
|
||||
**(request.chat_template_kwargs or {}),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Error in applying chat template from request: %s", e)
|
||||
|
Reference in New Issue
Block a user