From 1037f633113a9e08f9fdf11339703cf9a28c9291 Mon Sep 17 00:00:00 2001 From: Yaowei Zheng Date: Wed, 15 Oct 2025 15:00:36 +0800 Subject: [PATCH] [model] add qwen3vl 4b + 8b (#9275) --- requirements.txt | 2 +- src/llamafactory/extras/constants.py | 28 ++++++++++++++++++++++------ src/llamafactory/extras/misc.py | 2 +- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index 79ac7c6c..0c198ba4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ # core deps transformers>=4.49.0,<=4.56.2,!=4.52.0; python_version < '3.10' -transformers>=4.49.0,<=4.57.0,!=4.52.0; python_version >= '3.10' +transformers>=4.49.0,<=4.57.1,!=4.52.0; python_version >= '3.10' datasets>=2.16.0,<=4.0.0 accelerate>=1.3.0,<=1.11.0 peft>=0.14.0,<=0.17.1 diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 62b60398..03587c53 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -3193,14 +3193,22 @@ register_model_group( register_model_group( models={ - "Qwen3-VL-235B-A22B-Instruct": { - DownloadSource.DEFAULT: "Qwen/Qwen3-VL-235B-A22B-Instruct", - DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-235B-A22B-Instruct", + "Qwen3-VL-4B-Instruct": { + DownloadSource.DEFAULT: "Qwen/Qwen3-VL-4B-Instruct", + DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-4B-Instruct", + }, + "Qwen3-VL-8B-Instruct": { + DownloadSource.DEFAULT: "Qwen/Qwen3-VL-8B-Instruct", + DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-8B-Instruct", }, "Qwen3-VL-30B-A3B-Instruct": { DownloadSource.DEFAULT: "Qwen/Qwen3-VL-30B-A3B-Instruct", DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-30B-A3B-Instruct", }, + "Qwen3-VL-235B-A22B-Instruct": { + DownloadSource.DEFAULT: "Qwen/Qwen3-VL-235B-A22B-Instruct", + DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-235B-A22B-Instruct", + }, }, template="qwen3_vl_nothink", multimodal=True, @@ -3209,14 +3217,22 @@ register_model_group( register_model_group( models={ - "Qwen3-VL-235B-A22B-Thinking": { - DownloadSource.DEFAULT: "Qwen/Qwen3-VL-235B-A22B-Thinking", - DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-235B-A22B-Thinking", + "Qwen3-VL-4B-Thinking": { + DownloadSource.DEFAULT: "Qwen/Qwen3-VL-4B-Thinking", + DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-4B-Thinking", + }, + "Qwen3-VL-8B-Thinking": { + DownloadSource.DEFAULT: "Qwen/Qwen3-VL-8B-Thinking", + DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-8B-Thinking", }, "Qwen3-VL-30B-A3B-Thinking": { DownloadSource.DEFAULT: "Qwen/Qwen3-VL-30B-A3B-Thinking", DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-30B-A3B-Thinking", }, + "Qwen3-VL-235B-A22B-Thinking": { + DownloadSource.DEFAULT: "Qwen/Qwen3-VL-235B-A22B-Thinking", + DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-235B-A22B-Thinking", + }, }, template="qwen3_vl", multimodal=True, diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 81bcdd94..4f1d40a7 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -94,7 +94,7 @@ def check_version(requirement: str, mandatory: bool = False) -> None: def check_dependencies() -> None: r"""Check the version of the required packages.""" - check_version("transformers>=4.49.0,<=4.57.0") + check_version("transformers>=4.49.0,<=4.57.1") check_version("datasets>=2.16.0,<=4.0.0") check_version("accelerate>=1.3.0,<=1.11.0") check_version("peft>=0.14.0,<=0.17.1")