[bugfix] fix pipeline parallel for mla & sfa attention backend (#3459)

### What this PR does / why we need it?
Fix pipeline parallel break for mla & sfa attention backend caused by a
magic number in metadata builder. The error report:
`AttributeError: 'PPMissingLayer' object has no attribute 'self_attn'`

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
This PR was tested with "mp" backend (PP2TP8 on an A3 node) as well as
"ray" backend (PP2TP8 on two A2 nodes).

- vLLM version: v0.11.0rc3
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0

---------

Signed-off-by: linfeng-yuan <1102311262@qq.com>
This commit is contained in:
linfeng-yuan
2025-10-15 17:13:27 +08:00
committed by GitHub
parent 5a3082cd15
commit 099255e933
3 changed files with 5 additions and 4 deletions

View File

@ -20,6 +20,7 @@ from tests.e2e.conftest import VllmRunner
MODELS = [
"Qwen/Qwen3-0.6B",
"deepseek-ai/DeepSeek-V2-Lite-Chat",
]
TENSOR_PARALLELS = [1]

View File

@ -314,9 +314,9 @@ class AscendMLAMetadataBuilder:
if self.cos_cache is None:
self.cos_cache = model.model.layers[
0].self_attn.rotary_emb.cos_cached
model.model.start_layer].self_attn.rotary_emb.cos_cached
self.sin_cache = model.model.layers[
0].self_attn.rotary_emb.sin_cached
model.model.start_layer].self_attn.rotary_emb.sin_cached
if self.cos_cache.dtype != self.model_config.dtype: # type: ignore
self.cos_cache = self.cos_cache.to( # type: ignore
self.model_config.dtype) # type: ignore

View File

@ -307,9 +307,9 @@ class AscendSFAMetadataBuilder:
if self.cos_cache is None:
self.cos_cache = model.model.layers[
0].self_attn.rotary_emb.cos_cached
model.model.start_layer].self_attn.rotary_emb.cos_cached
self.sin_cache = model.model.layers[
0].self_attn.rotary_emb.sin_cached
model.model.start_layer].self_attn.rotary_emb.sin_cached
if self.cos_cache.dtype != self.model_config.dtype: # type: ignore
self.cos_cache = self.cos_cache.to( # type: ignore
self.model_config.dtype) # type: ignore