[Frontend] Add template related params to request (#5709)

This commit is contained in:
danieljannai21
2024-07-02 09:01:57 +03:00
committed by GitHub
parent 3476ed0809
commit 2c37540aa6
3 changed files with 30 additions and 1 deletions

View File

@ -6,7 +6,7 @@ numpy < 2.0.0
requests
tqdm
py-cpuinfo
transformers >= 4.42.0 # Required for Gemma 2.
transformers >= 4.42.0 # Required for Gemma 2 and for additional chat template parameters.
tokenizers >= 0.19.1 # Required for Llama 3.
fastapi
aiohttp

View File

@ -190,6 +190,27 @@ class ChatCompletionRequest(OpenAIBaseModel):
"special tokens so this should be set to False (as is the "
"default)."),
)
documents: Optional[List[Dict[str, str]]] = Field(
default=None,
description=
("A list of dicts representing documents that will be accessible to "
"the model if it is performing RAG (retrieval-augmented generation)."
" If the template does not support RAG, this argument will have no "
"effect. We recommend that each document should be a dict containing "
"\"title\" and \"text\" keys."),
)
chat_template: Optional[str] = Field(
default=None,
description=(
"A Jinja template to use for this conversion. "
"If this is not passed, the model's default chat template will be "
"used instead."),
)
chat_template_kwargs: Optional[Dict[str, Any]] = Field(
default=None,
description=("Additional kwargs to pass to the template renderer. "
"Will be accessible by the chat template."),
)
include_stop_str_in_output: Optional[bool] = Field(
default=False,
description=(

View File

@ -218,10 +218,18 @@ class OpenAIServingChat(OpenAIServing):
conversation.extend(chat_parsed_result.messages)
image_futures.extend(chat_parsed_result.image_futures)
tool_dicts = None if request.tools is None else [
tool.model_dump() for tool in request.tools
]
prompt = self.tokenizer.apply_chat_template(
conversation=conversation,
tokenize=False,
add_generation_prompt=request.add_generation_prompt,
tools=tool_dicts,
documents=request.documents,
chat_template=request.chat_template,
**(request.chat_template_kwargs or {}),
)
except Exception as e:
logger.error("Error in applying chat template from request: %s", e)