diff --git a/tests/e2e/multicard/test_pipeline_parallel.py b/tests/e2e/multicard/test_pipeline_parallel.py index 03774db14..fa21fe8d7 100644 --- a/tests/e2e/multicard/test_pipeline_parallel.py +++ b/tests/e2e/multicard/test_pipeline_parallel.py @@ -20,6 +20,7 @@ from tests.e2e.conftest import VllmRunner MODELS = [ "Qwen/Qwen3-0.6B", + "deepseek-ai/DeepSeek-V2-Lite-Chat", ] TENSOR_PARALLELS = [1] diff --git a/vllm_ascend/attention/mla_v1.py b/vllm_ascend/attention/mla_v1.py index c8379b7e7..819edcbb9 100644 --- a/vllm_ascend/attention/mla_v1.py +++ b/vllm_ascend/attention/mla_v1.py @@ -314,9 +314,9 @@ class AscendMLAMetadataBuilder: if self.cos_cache is None: self.cos_cache = model.model.layers[ - 0].self_attn.rotary_emb.cos_cached + model.model.start_layer].self_attn.rotary_emb.cos_cached self.sin_cache = model.model.layers[ - 0].self_attn.rotary_emb.sin_cached + model.model.start_layer].self_attn.rotary_emb.sin_cached if self.cos_cache.dtype != self.model_config.dtype: # type: ignore self.cos_cache = self.cos_cache.to( # type: ignore self.model_config.dtype) # type: ignore diff --git a/vllm_ascend/attention/sfa_v1.py b/vllm_ascend/attention/sfa_v1.py index 55282c844..edbd7cc5d 100644 --- a/vllm_ascend/attention/sfa_v1.py +++ b/vllm_ascend/attention/sfa_v1.py @@ -307,9 +307,9 @@ class AscendSFAMetadataBuilder: if self.cos_cache is None: self.cos_cache = model.model.layers[ - 0].self_attn.rotary_emb.cos_cached + model.model.start_layer].self_attn.rotary_emb.cos_cached self.sin_cache = model.model.layers[ - 0].self_attn.rotary_emb.sin_cached + model.model.start_layer].self_attn.rotary_emb.sin_cached if self.cos_cache.dtype != self.model_config.dtype: # type: ignore self.cos_cache = self.cos_cache.to( # type: ignore self.model_config.dtype) # type: ignore