[model] add qwen3vl 4b + 8b (#9275)

2025-10-20 12:54:18 +08:00 · 2025-10-15 15:00:36 +08:00
parent c867e28093
commit 1037f63311
3 changed files with 24 additions and 8 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -1,6 +1,6 @@
 # core deps
 transformers>=4.49.0,<=4.56.2,!=4.52.0; python_version < '3.10'
-transformers>=4.49.0,<=4.57.0,!=4.52.0; python_version >= '3.10'
+transformers>=4.49.0,<=4.57.1,!=4.52.0; python_version >= '3.10'
 datasets>=2.16.0,<=4.0.0
 accelerate>=1.3.0,<=1.11.0
 peft>=0.14.0,<=0.17.1
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@ -3193,14 +3193,22 @@ register_model_group(
 register_model_group(
    models={
-        "Qwen3-VL-235B-A22B-Instruct": {
+        "Qwen3-VL-4B-Instruct": {
-            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-235B-A22B-Instruct",
+            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-4B-Instruct",
-            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-235B-A22B-Instruct",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-4B-Instruct",
        },
        "Qwen3-VL-8B-Instruct": {
            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-8B-Instruct",
            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-8B-Instruct",
        },
        "Qwen3-VL-30B-A3B-Instruct": {
            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-30B-A3B-Instruct",
            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-30B-A3B-Instruct",
        },
        "Qwen3-VL-235B-A22B-Instruct": {
            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-235B-A22B-Instruct",
            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-235B-A22B-Instruct",
        },
    },
    template="qwen3_vl_nothink",
    multimodal=True,
@ -3209,14 +3217,22 @@ register_model_group(
 register_model_group(
    models={
-        "Qwen3-VL-235B-A22B-Thinking": {
+        "Qwen3-VL-4B-Thinking": {
-            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-235B-A22B-Thinking",
+            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-4B-Thinking",
-            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-235B-A22B-Thinking",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-4B-Thinking",
        },
        "Qwen3-VL-8B-Thinking": {
            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-8B-Thinking",
            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-8B-Thinking",
        },
        "Qwen3-VL-30B-A3B-Thinking": {
            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-30B-A3B-Thinking",
            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-30B-A3B-Thinking",
        },
        "Qwen3-VL-235B-A22B-Thinking": {
            DownloadSource.DEFAULT: "Qwen/Qwen3-VL-235B-A22B-Thinking",
            DownloadSource.MODELSCOPE: "Qwen/Qwen3-VL-235B-A22B-Thinking",
        },
    },
    template="qwen3_vl",
    multimodal=True,
--- a/src/llamafactory/extras/misc.py
+++ b/src/llamafactory/extras/misc.py
@ -94,7 +94,7 @@ def check_version(requirement: str, mandatory: bool = False) -> None:
 def check_dependencies() -> None:
    r"""Check the version of the required packages."""
-    check_version("transformers>=4.49.0,<=4.57.0")
+    check_version("transformers>=4.49.0,<=4.57.1")
    check_version("datasets>=2.16.0,<=4.0.0")
    check_version("accelerate>=1.3.0,<=1.11.0")
    check_version("peft>=0.14.0,<=0.17.1")