[bugfix] fix pipeline parallel for mla & sfa attention backend (#3459)

### What this PR does / why we need it? Fix pipeline parallel break for mla & sfa attention backend caused by a magic number in metadata builder. The error report: `AttributeError: 'PPMissingLayer' object has no attribute 'self_attn'` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? This PR was tested with "mp" backend (PP2TP8 on an A3 node) as well as "ray" backend (PP2TP8 on two A2 nodes). - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: linfeng-yuan <1102311262@qq.com>
2025-10-20 13:43:53 +08:00 · 2025-10-15 17:13:27 +08:00
parent 5a3082cd15
commit 099255e933
3 changed files with 5 additions and 4 deletions
--- a/tests/e2e/multicard/test_pipeline_parallel.py
+++ b/tests/e2e/multicard/test_pipeline_parallel.py
@ -20,6 +20,7 @@ from tests.e2e.conftest import VllmRunner

 MODELS = [
    "Qwen/Qwen3-0.6B",
+    "deepseek-ai/DeepSeek-V2-Lite-Chat",
 ]

 TENSOR_PARALLELS = [1]
--- a/vllm_ascend/attention/mla_v1.py
+++ b/vllm_ascend/attention/mla_v1.py
@ -314,9 +314,9 @@ class AscendMLAMetadataBuilder:

        if self.cos_cache is None:
            self.cos_cache = model.model.layers[
-                0].self_attn.rotary_emb.cos_cached
+                model.model.start_layer].self_attn.rotary_emb.cos_cached
            self.sin_cache = model.model.layers[
-                0].self_attn.rotary_emb.sin_cached
+                model.model.start_layer].self_attn.rotary_emb.sin_cached
        if self.cos_cache.dtype != self.model_config.dtype:  # type: ignore
            self.cos_cache = self.cos_cache.to(  # type: ignore
                self.model_config.dtype)  # type: ignore
--- a/vllm_ascend/attention/sfa_v1.py
+++ b/vllm_ascend/attention/sfa_v1.py
@ -307,9 +307,9 @@ class AscendSFAMetadataBuilder:

        if self.cos_cache is None:
            self.cos_cache = model.model.layers[
-                0].self_attn.rotary_emb.cos_cached
+                model.model.start_layer].self_attn.rotary_emb.cos_cached
            self.sin_cache = model.model.layers[
-                0].self_attn.rotary_emb.sin_cached
+                model.model.start_layer].self_attn.rotary_emb.sin_cached
        if self.cos_cache.dtype != self.model_config.dtype:  # type: ignore
            self.cos_cache = self.cos_cache.to(  # type: ignore
                self.model_config.dtype)  # type: ignore