mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
Add chat template for Llama 4 models (#16428)
Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
This commit is contained in:
committed by
GitHub
parent
fe92176321
commit
05e1fbfc52
@ -152,10 +152,11 @@ Recommended flags: `--tool-call-parser mistral --chat-template examples/tool_cha
|
||||
|
||||
Supported models:
|
||||
|
||||
All Llama 3.1 and 3.2 models should be supported.
|
||||
All Llama 3.1, 3.2 and 4 models should be supported.
|
||||
|
||||
* `meta-llama/Llama-3.1-*`
|
||||
* `meta-llama/Llama-3.2-*`
|
||||
* `meta-llama/Llama-4-*`
|
||||
|
||||
The tool calling that is supported is the [JSON based tool calling](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/#json-based-tool-calling). For [pythonic tool calling](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/text_prompt_format.md#zero-shot-function-calling) introduced by the Llama-3.2 models, see the `pythonic` tool parser below.
|
||||
|
||||
@ -176,6 +177,12 @@ images.
|
||||
|
||||
Recommended flags: `--tool-call-parser llama3_json --chat-template {see_above}`
|
||||
|
||||
VLLM also provides a JSON based chat template for Llama 4:
|
||||
* `examples/tool_chat_template_llama4_json.jinja` - this is based on the "official" chat template for the Llama 4
|
||||
models, but tweaked so that it works better with vLLM.
|
||||
|
||||
For Llama 4 use `--tool-call-parser llama4_json examples/tool_chat_template_llama4_json.jinja`.
|
||||
|
||||
#### IBM Granite
|
||||
|
||||
Supported models:
|
||||
|
116
examples/tool_chat_template_llama4_json.jinja
Normal file
116
examples/tool_chat_template_llama4_json.jinja
Normal file
@ -0,0 +1,116 @@
|
||||
{%- macro is_array_of_type_objects(var) -%}
|
||||
{%- if var is iterable and var is not string -%}
|
||||
{%- set valid = true -%}
|
||||
{%- for item in var -%}
|
||||
{%- if 'type' not in item -%}
|
||||
{%- set valid = false -%}
|
||||
{%- break -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{{ valid }}
|
||||
{%- else -%}
|
||||
{{ false }}
|
||||
{%- endif -%}
|
||||
{%- endmacro %}
|
||||
|
||||
{%- macro render_message(message) %}
|
||||
{%- if message['content'] is string %}
|
||||
{{- message['content']|trim }}
|
||||
{%- elif is_array_of_type_objects(data) == 'True' %}
|
||||
{%- for content in message['content'] %}
|
||||
{%- if content['type'] == 'image' %}
|
||||
{{- '<|image|>' }}
|
||||
{%- elif content['type'] == 'text' %}
|
||||
{{- content['text']|trim }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- else %}
|
||||
{{- message['content']|tojson }}
|
||||
{%- endif %}
|
||||
{%- endmacro %}
|
||||
|
||||
{{- bos_token }}
|
||||
{%- if custom_tools is defined %}
|
||||
{%- set tools = custom_tools %}
|
||||
{%- endif %}
|
||||
{%- if not tools_in_user_message is defined %}
|
||||
{%- set tools_in_user_message = true %}
|
||||
{%- endif %}
|
||||
{%- if not tools is defined %}
|
||||
{%- set tools = none %}
|
||||
{%- endif %}
|
||||
|
||||
{#- This block extracts the system message, so we can slot it into the right place. #}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{%- set system_message = messages[0] %}
|
||||
{%- set messages = messages[1:] %}
|
||||
{%- else %}
|
||||
{%- set system_message = ({ "content": "You are a helpful assistant with tool calling "
|
||||
"capabilities. Only reply with a tool call if the function exists in the "
|
||||
"library provided by the user. If it doesn't exist, just reply directly in "
|
||||
"natural language. When you receive a tool call response, use the output to "
|
||||
"format an answer to the original user question."}) %}
|
||||
{%- endif %}
|
||||
|
||||
{%- set tool_lib_preamble = 'Tools: You have access to the following tools. You might need to use one '
|
||||
'or more function/tool calls to fulfill the task. \n'
|
||||
'If none are needed, then proceed to the response.\n\n'
|
||||
'Tool Call Syntax: You can call tools using the following syntax:\n'
|
||||
'{"name": function name, "parameters": dictionary of argument name and its value}.\n'
|
||||
'Separate multiple function calls by "; ". Do not use variables.\n'
|
||||
'Do not include anything else when calling the tools with the syntax above.\n\n'
|
||||
'Here is a list of functions in JSON format that you can invoke.\n' %}
|
||||
|
||||
{{- "<|header_start|>system<|header_end|>\n\n" }}
|
||||
{%- if tools is not none and not tools_in_user_message %}
|
||||
{{- tool_lib_preamble }}
|
||||
{%- for t in tools %}
|
||||
{{- t | tojson(indent=4) }}
|
||||
{{- "\n\n" }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- render_message(system_message) }}
|
||||
{{ "<|eot|>\n" }}
|
||||
|
||||
{#- Custom tools are passed in a user message with some extra guidance #}
|
||||
{%- if tools_in_user_message and not tools is none %}
|
||||
{#- Extract the first user message so we can plug it in here #}
|
||||
{%- if messages | length != 0 %}
|
||||
{%- set first_user_message = messages[0] %}
|
||||
{%- set messages = messages[1:] %}
|
||||
{%- else %}
|
||||
{{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
|
||||
{%- endif %}
|
||||
{{- '<|header_start|>user<|header_end|>\n\n' }}
|
||||
{{- tool_lib_preamble }}
|
||||
{%- for t in tools %}
|
||||
{{- t | tojson(indent=4) }}
|
||||
{{- "\n\n" }}
|
||||
{%- endfor %}
|
||||
{{- render_message(first_user_message) + "\n<|eot|>"}}
|
||||
{%- endif %}
|
||||
|
||||
{%- for message in messages %}
|
||||
{%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
|
||||
{{- '<|header_start|>' + message['role'] + '<|header_end|>\n\n' }}
|
||||
{{- render_message(message) }}
|
||||
{{- "\n<|eot|>" }}
|
||||
{%- elif 'tool_calls' in message and message.tool_calls|length > 0 %}
|
||||
{{- '\n<|header_start|>assistant<|header_end|>\n\n' -}}
|
||||
{{- render_message(message) }}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{{- '{"name": "' + tool_call.function.name + '", ' }}
|
||||
{{- '"parameters": ' }}
|
||||
{{- tool_call.function.arguments | tojson }}
|
||||
{{- "}" }}
|
||||
{%- endfor %}
|
||||
{{- "\n<|eot|>" }}
|
||||
{%- elif message.role == "tool" or message.role == "ipython" %}
|
||||
{{- "\n<|header_start|>ipython<|header_end|>\n\n" }}
|
||||
{{- render_message(message) }}
|
||||
{{- "\n<|eom|>" }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '\n<|header_start|>assistant<|header_end|>\n\n' }}
|
||||
{%- endif %}
|
@ -98,6 +98,20 @@ CONFIGS: dict[str, ServerConfig] = {
|
||||
"extended":
|
||||
True
|
||||
},
|
||||
"llama4_json": {
|
||||
"model":
|
||||
"meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
||||
"arguments": [
|
||||
"--enforce-eager", "--no-enable-prefix-caching", "-tp", "4",
|
||||
"--distributed-executor-backend", "mp", "--tool-call-parser",
|
||||
"llama4_json", "--chat-template",
|
||||
str(VLLM_PATH / "examples/tool_chat_template_llama4_json.jinja")
|
||||
],
|
||||
"supports_parallel":
|
||||
True,
|
||||
"extended":
|
||||
True
|
||||
},
|
||||
"mistral": {
|
||||
"model":
|
||||
"mistralai/Mistral-7B-Instruct-v0.3",
|
||||
|
@ -27,6 +27,7 @@ logger = init_logger(__name__)
|
||||
|
||||
|
||||
@ToolParserManager.register_module("llama3_json")
|
||||
@ToolParserManager.register_module("llama4_json")
|
||||
class Llama3JsonToolParser(ToolParser):
|
||||
"""
|
||||
Tool call parser for Llama 3.1 models intended for use with the
|
||||
|
Reference in New Issue
Block a user