skip fusedmoe layer for start_load_kv (#21378)

Signed-off-by: calvin chen <wen.chen@dynamia.ai>
2025-10-20 14:53:52 +08:00 · 2025-07-29 09:59:44 +08:00
parent afa2607596
commit e18f085103
1 changed files with 10 additions and 2 deletions
--- a/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
@ -192,8 +192,16 @@ class P2pNcclConnector(KVConnectorBase_V1):
        # Load the KV for each request each layer
        for request in metadata.requests:
            for layer_name in forward_context.no_compile_layers:
-                attn_layer = forward_context.no_compile_layers[layer_name]
-                kv_cache_layer = attn_layer.kv_cache[ \
+                layer = forward_context.no_compile_layers[layer_name]
+
+                # Only process layers that have kv_cache
+                # attribute (attention layers) Skip non-attention
+                # layers like FusedMoE
+                kv_cache = getattr(layer, 'kv_cache', None)
+                if kv_cache is None:
+                    continue
+
+                kv_cache_layer = kv_cache[ \
                    forward_context.virtual_engine]

                kv_cache = self.p2p_nccl_engine.recv_tensor(