[Config] add "qwen" as a native eagle3 target supported model (#22333)

Signed-off-by: lechen <lecself@163.com> Signed-off-by: LeChen <lecself@163.com>
2025-10-20 14:53:52 +08:00 · 2025-08-10 11:21:05 +08:00
parent 0c5254b82a
commit 3d7363e61c
5 changed files with 30 additions and 27 deletions
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@ -525,6 +525,10 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
                                            trust_remote_code=True,
                                            speculative_model="yuhuili/EAGLE3-LLaMA3.1-Instruct-8B",
                                            tokenizer="meta-llama/Llama-3.1-8B-Instruct"),
+    "LlamaForCausalLMEagle3": _HfExamplesInfo("AngelSlim/Qwen3-8B_eagle3",  # noqa: E501
+                                              trust_remote_code=True,
+                                              speculative_model="AngelSlim/Qwen3-8B_eagle3",
+                                              tokenizer="Qwen/Qwen3-8B"),
    "EagleLlama4ForCausalLM": _HfExamplesInfo(
        "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct",
        trust_remote_code=True,
--- a/tests/v1/e2e/test_spec_decode.py
+++ b/tests/v1/e2e/test_spec_decode.py
@ -125,24 +125,27 @@ def test_ngram_correctness(
        cleanup_dist_env_and_memory()


-@pytest.mark.parametrize(
-    ["model_setup", "mm_enabled"], [
-        (("eagle", "meta-llama/Llama-3.1-8B-Instruct",
-          "yuhuili/EAGLE-LLaMA3.1-Instruct-8B", 1), False),
-        (("eagle3", "meta-llama/Llama-3.1-8B-Instruct",
-          "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", 1), False),
-        pytest.param(
-            ("eagle", "meta-llama/Llama-4-Scout-17B-16E-Instruct",
-             "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct", 4),
-            False,
-            marks=pytest.mark.skip(reason="Skipping due to CI OOM issues")),
-        pytest.param(
-            ("eagle", "meta-llama/Llama-4-Scout-17B-16E-Instruct",
-             "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct", 4),
-            True,
-            marks=pytest.mark.skip(reason="Skipping due to CI OOM issues")),
-    ],
-    ids=["llama3_eagle", "llama3_eagle3", "llama4_eagle", "llama4_eagle_mm"])
+@pytest.mark.parametrize(["model_setup", "mm_enabled"], [
+    (("eagle3", "Qwen/Qwen3-8B", "AngelSlim/Qwen3-8B_eagle3", 1), False),
+    (("eagle", "meta-llama/Llama-3.1-8B-Instruct",
+      "yuhuili/EAGLE-LLaMA3.1-Instruct-8B", 1), False),
+    (("eagle3", "meta-llama/Llama-3.1-8B-Instruct",
+      "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", 1), False),
+    pytest.param(
+        ("eagle", "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+         "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct", 4),
+        False,
+        marks=pytest.mark.skip(reason="Skipping due to CI OOM issues")),
+    pytest.param(
+        ("eagle", "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+         "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct", 4),
+        True,
+        marks=pytest.mark.skip(reason="Skipping due to CI OOM issues")),
+],
+                         ids=[
+                             "qwen3_eagle3", "llama3_eagle", "llama3_eagle3",
+                             "llama4_eagle", "llama4_eagle_mm"
+                         ])
@pytest.mark.parametrize("attn_backend",
                         get_attn_backend_list_based_on_platform())
 def test_eagle_correctness(
--- a/vllm/config/init.py
+++ b/vllm/config/init.py
@ -2852,13 +2852,7 @@ class SpeculativeConfig:
                             "speculative decoding is > 1, but got "
                             f"{self.disable_by_batch_size=}")

-        from vllm.transformers_utils.configs import SpeculatorsConfig
-
-        eagle3_target_supported = ["llama"]
-        if self.draft_model_config and isinstance(
-                self.draft_model_config.hf_config, SpeculatorsConfig):
-            eagle3_target_supported.append("qwen")
-
+        eagle3_target_supported = ["llama", "qwen"]
        if self.method == "eagle3" and self.target_model_config and not any(
                supported_model in
                self.target_model_config.hf_text_config.model_type
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@ -259,6 +259,7 @@ _SPECULATIVE_DECODING_MODELS = {
    "EagleLlama4ForCausalLM": ("llama4_eagle", "EagleLlama4ForCausalLM"),
    "EagleMiniCPMForCausalLM": ("minicpm_eagle", "EagleMiniCPMForCausalLM"),
    "Eagle3LlamaForCausalLM": ("llama_eagle3", "Eagle3LlamaForCausalLM"),
+    "LlamaForCausalLMEagle3": ("llama_eagle3", "Eagle3LlamaForCausalLM"),
    "DeepSeekMTPModel": ("deepseek_mtp", "DeepSeekMTP"),
    "Glm4MoeMTPModel": ("glm4_moe_mtp", "Glm4MoeMTP"),
    "MedusaModel": ("medusa", "Medusa"),
--- a/vllm/transformers_utils/configs/eagle.py
+++ b/vllm/transformers_utils/configs/eagle.py
@ -45,6 +45,7 @@ class EAGLEConfig(PretrainedConfig):

        # Eagle model name should follow naming convention of
        # LlamaForCausalLM -> EagleLlamaForCausalLM
+        # LlamaForCausalLM -> Eagle3LlamaForCausalLM / LlamaForCausalLMEagle3
        if method == "eagle":
            assert self.model is not None, \
                "model should not be None when method is eagle"
@ -56,8 +57,8 @@ class EAGLEConfig(PretrainedConfig):
            assert self.model is not None, \
                "model should not be None when method is eagle3"
            kwargs["architectures"] = [
-                f"Eagle3{arch}" if not arch.startswith("Eagle3") \
-                    else arch for arch in self.model.architectures
+                arch if arch.startswith("Eagle3") or arch.endswith("Eagle3")
+                else f"Eagle3{arch}" for arch in self.model.architectures
            ]
        else:
            raise ValueError(f"Invalid method {method}. \