From 1037f633113a9e08f9fdf11339703cf9a28c9291 Mon Sep 17 00:00:00 2001
From: Yaowei Zheng <hiyouga@buaa.edu.cn>
Date: Wed, 15 Oct 2025 15:00:36 +0800
Subject: [PATCH] [model] add qwen3vl 4b + 8b (#9275)

---
 requirements.txt                     |  2 +-
 src/llamafactory/extras/constants.py | 28 ++++++++++++++++++++++------
 src/llamafactory/extras/misc.py      |  2 +-
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 79ac7c6c..0c198ba4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 # core deps
 transformers>=4.49.0,<=4.56.2,!=4.52.0; python_version < '3.10'
-transformers>=4.49.0,<=4.57.0,!=4.52.0; python_version >= '3.10'
+transformers>=4.49.0,<=4.57.1,!=4.52.0; python_version >= '3.10'
 datasets>=2.16.0,<=4.0.0
 accelerate>=1.3.0,<=1.11.0
 peft>=0.14.0,<=0.17.1
diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index 62b60398..03587c53 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -3193,14 +3193,22 @@ register_model_group(
 
 register_model_group(
     models={
-        "Qwen3-VL-235B-A22B-Instruct": {
-            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-235B-A22B-Instruct",
-            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-235B-A22B-Instruct",
+        "Qwen3-VL-4B-Instruct": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-4B-Instruct",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-4B-Instruct",
+        },
+        "Qwen3-VL-8B-Instruct": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-8B-Instruct",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-8B-Instruct",
         },
         "Qwen3-VL-30B-A3B-Instruct": {
             DownloadSource.DEFAULT: "Qwen/Qwen3-VL-30B-A3B-Instruct",
             DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-30B-A3B-Instruct",
         },
+        "Qwen3-VL-235B-A22B-Instruct": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-235B-A22B-Instruct",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-235B-A22B-Instruct",
+        },
     },
     template="qwen3_vl_nothink",
     multimodal=True,
@@ -3209,14 +3217,22 @@ register_model_group(
 
 register_model_group(
     models={
-        "Qwen3-VL-235B-A22B-Thinking": {
-            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-235B-A22B-Thinking",
-            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-235B-A22B-Thinking",
+        "Qwen3-VL-4B-Thinking": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-4B-Thinking",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-4B-Thinking",
+        },
+        "Qwen3-VL-8B-Thinking": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-8B-Thinking",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-8B-Thinking",
         },
         "Qwen3-VL-30B-A3B-Thinking": {
             DownloadSource.DEFAULT: "Qwen/Qwen3-VL-30B-A3B-Thinking",
             DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-30B-A3B-Thinking",
         },
+        "Qwen3-VL-235B-A22B-Thinking": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-235B-A22B-Thinking",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-235B-A22B-Thinking",
+        },
     },
     template="qwen3_vl",
     multimodal=True,
diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py
index 81bcdd94..4f1d40a7 100644
--- a/src/llamafactory/extras/misc.py
+++ b/src/llamafactory/extras/misc.py
@@ -94,7 +94,7 @@ def check_version(requirement: str, mandatory: bool = False) -> None:
 
 def check_dependencies() -> None:
     r"""Check the version of the required packages."""
-    check_version("transformers>=4.49.0,<=4.57.0")
+    check_version("transformers>=4.49.0,<=4.57.1")
     check_version("datasets>=2.16.0,<=4.0.0")
     check_version("accelerate>=1.3.0,<=1.11.0")
     check_version("peft>=0.14.0,<=0.17.1")