Add chat template for Llama 4 models (#16428)

Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
This commit is contained in:
Maximilien de Bayser
2025-04-24 17:19:36 -03:00
committed by GitHub
parent fe92176321
commit 05e1fbfc52
4 changed files with 139 additions and 1 deletions

View File

@ -152,10 +152,11 @@ Recommended flags: `--tool-call-parser mistral --chat-template examples/tool_cha
Supported models:
All Llama 3.1 and 3.2 models should be supported.
All Llama 3.1, 3.2 and 4 models should be supported.
* `meta-llama/Llama-3.1-*`
* `meta-llama/Llama-3.2-*`
* `meta-llama/Llama-4-*`
The tool calling that is supported is the [JSON based tool calling](https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/#json-based-tool-calling). For [pythonic tool calling](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/text_prompt_format.md#zero-shot-function-calling) introduced by the Llama-3.2 models, see the `pythonic` tool parser below.
@ -176,6 +177,12 @@ images.
Recommended flags: `--tool-call-parser llama3_json --chat-template {see_above}`
VLLM also provides a JSON based chat template for Llama 4:
* `examples/tool_chat_template_llama4_json.jinja` - this is based on the "official" chat template for the Llama 4
models, but tweaked so that it works better with vLLM.
For Llama 4 use `--tool-call-parser llama4_json examples/tool_chat_template_llama4_json.jinja`.
#### IBM Granite
Supported models:

View File

@ -0,0 +1,116 @@
{%- macro is_array_of_type_objects(var) -%}
{%- if var is iterable and var is not string -%}
{%- set valid = true -%}
{%- for item in var -%}
{%- if 'type' not in item -%}
{%- set valid = false -%}
{%- break -%}
{%- endif -%}
{%- endfor -%}
{{ valid }}
{%- else -%}
{{ false }}
{%- endif -%}
{%- endmacro %}
{%- macro render_message(message) %}
{%- if message['content'] is string %}
{{- message['content']|trim }}
{%- elif is_array_of_type_objects(data) == 'True' %}
{%- for content in message['content'] %}
{%- if content['type'] == 'image' %}
{{- '<|image|>' }}
{%- elif content['type'] == 'text' %}
{{- content['text']|trim }}
{%- endif %}
{%- endfor %}
{%- else %}
{{- message['content']|tojson }}
{%- endif %}
{%- endmacro %}
{{- bos_token }}
{%- if custom_tools is defined %}
{%- set tools = custom_tools %}
{%- endif %}
{%- if not tools_in_user_message is defined %}
{%- set tools_in_user_message = true %}
{%- endif %}
{%- if not tools is defined %}
{%- set tools = none %}
{%- endif %}
{#- This block extracts the system message, so we can slot it into the right place. #}
{%- if messages[0]['role'] == 'system' %}
{%- set system_message = messages[0] %}
{%- set messages = messages[1:] %}
{%- else %}
{%- set system_message = ({ "content": "You are a helpful assistant with tool calling "
"capabilities. Only reply with a tool call if the function exists in the "
"library provided by the user. If it doesn't exist, just reply directly in "
"natural language. When you receive a tool call response, use the output to "
"format an answer to the original user question."}) %}
{%- endif %}
{%- set tool_lib_preamble = 'Tools: You have access to the following tools. You might need to use one '
'or more function/tool calls to fulfill the task. \n'
'If none are needed, then proceed to the response.\n\n'
'Tool Call Syntax: You can call tools using the following syntax:\n'
'{"name": function name, "parameters": dictionary of argument name and its value}.\n'
'Separate multiple function calls by "; ". Do not use variables.\n'
'Do not include anything else when calling the tools with the syntax above.\n\n'
'Here is a list of functions in JSON format that you can invoke.\n' %}
{{- "<|header_start|>system<|header_end|>\n\n" }}
{%- if tools is not none and not tools_in_user_message %}
{{- tool_lib_preamble }}
{%- for t in tools %}
{{- t | tojson(indent=4) }}
{{- "\n\n" }}
{%- endfor %}
{%- endif %}
{{- render_message(system_message) }}
{{ "<|eot|>\n" }}
{#- Custom tools are passed in a user message with some extra guidance #}
{%- if tools_in_user_message and not tools is none %}
{#- Extract the first user message so we can plug it in here #}
{%- if messages | length != 0 %}
{%- set first_user_message = messages[0] %}
{%- set messages = messages[1:] %}
{%- else %}
{{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
{%- endif %}
{{- '<|header_start|>user<|header_end|>\n\n' }}
{{- tool_lib_preamble }}
{%- for t in tools %}
{{- t | tojson(indent=4) }}
{{- "\n\n" }}
{%- endfor %}
{{- render_message(first_user_message) + "\n<|eot|>"}}
{%- endif %}
{%- for message in messages %}
{%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
{{- '<|header_start|>' + message['role'] + '<|header_end|>\n\n' }}
{{- render_message(message) }}
{{- "\n<|eot|>" }}
{%- elif 'tool_calls' in message and message.tool_calls|length > 0 %}
{{- '\n<|header_start|>assistant<|header_end|>\n\n' -}}
{{- render_message(message) }}
{%- for tool_call in message.tool_calls %}
{{- '{"name": "' + tool_call.function.name + '", ' }}
{{- '"parameters": ' }}
{{- tool_call.function.arguments | tojson }}
{{- "}" }}
{%- endfor %}
{{- "\n<|eot|>" }}
{%- elif message.role == "tool" or message.role == "ipython" %}
{{- "\n<|header_start|>ipython<|header_end|>\n\n" }}
{{- render_message(message) }}
{{- "\n<|eom|>" }}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '\n<|header_start|>assistant<|header_end|>\n\n' }}
{%- endif %}

View File

@ -98,6 +98,20 @@ CONFIGS: dict[str, ServerConfig] = {
"extended":
True
},
"llama4_json": {
"model":
"meta-llama/Llama-4-Scout-17B-16E-Instruct",
"arguments": [
"--enforce-eager", "--no-enable-prefix-caching", "-tp", "4",
"--distributed-executor-backend", "mp", "--tool-call-parser",
"llama4_json", "--chat-template",
str(VLLM_PATH / "examples/tool_chat_template_llama4_json.jinja")
],
"supports_parallel":
True,
"extended":
True
},
"mistral": {
"model":
"mistralai/Mistral-7B-Instruct-v0.3",

View File

@ -27,6 +27,7 @@ logger = init_logger(__name__)
@ToolParserManager.register_module("llama3_json")
@ToolParserManager.register_module("llama4_json")
class Llama3JsonToolParser(ToolParser):
"""
Tool call parser for Llama 3.1 models intended for use with the