mirror of
https://github.com/vllm-project/vllm-ascend.git
synced 2025-10-20 13:43:53 +08:00
[bugfix] fix pipeline parallel for mla & sfa attention backend (#3459)
### What this PR does / why we need it? Fix pipeline parallel break for mla & sfa attention backend caused by a magic number in metadata builder. The error report: `AttributeError: 'PPMissingLayer' object has no attribute 'self_attn'` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? This PR was tested with "mp" backend (PP2TP8 on an A3 node) as well as "ray" backend (PP2TP8 on two A2 nodes). - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: linfeng-yuan <1102311262@qq.com>
This commit is contained in:
@ -20,6 +20,7 @@ from tests.e2e.conftest import VllmRunner
|
||||
|
||||
MODELS = [
|
||||
"Qwen/Qwen3-0.6B",
|
||||
"deepseek-ai/DeepSeek-V2-Lite-Chat",
|
||||
]
|
||||
|
||||
TENSOR_PARALLELS = [1]
|
||||
|
@ -314,9 +314,9 @@ class AscendMLAMetadataBuilder:
|
||||
|
||||
if self.cos_cache is None:
|
||||
self.cos_cache = model.model.layers[
|
||||
0].self_attn.rotary_emb.cos_cached
|
||||
model.model.start_layer].self_attn.rotary_emb.cos_cached
|
||||
self.sin_cache = model.model.layers[
|
||||
0].self_attn.rotary_emb.sin_cached
|
||||
model.model.start_layer].self_attn.rotary_emb.sin_cached
|
||||
if self.cos_cache.dtype != self.model_config.dtype: # type: ignore
|
||||
self.cos_cache = self.cos_cache.to( # type: ignore
|
||||
self.model_config.dtype) # type: ignore
|
||||
|
@ -307,9 +307,9 @@ class AscendSFAMetadataBuilder:
|
||||
|
||||
if self.cos_cache is None:
|
||||
self.cos_cache = model.model.layers[
|
||||
0].self_attn.rotary_emb.cos_cached
|
||||
model.model.start_layer].self_attn.rotary_emb.cos_cached
|
||||
self.sin_cache = model.model.layers[
|
||||
0].self_attn.rotary_emb.sin_cached
|
||||
model.model.start_layer].self_attn.rotary_emb.sin_cached
|
||||
if self.cos_cache.dtype != self.model_config.dtype: # type: ignore
|
||||
self.cos_cache = self.cos_cache.to( # type: ignore
|
||||
self.model_config.dtype) # type: ignore
|
||||
|
Reference in New Issue
Block a user