Remove redundant all gather + split (#23441)

Co-authored-by: Chenxi Yang <cxyang@meta.com> Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com>
2025-10-20 23:03:52 +08:00 · 2025-09-10 23:45:07 -07:00
parent 9bd831f501
commit d13360183a
1 changed files with 0 additions and 13 deletions
--- a/vllm/model_executor/models/glm4_1v.py
+++ b/vllm/model_executor/models/glm4_1v.py
@ -272,23 +272,10 @@ class Glm4vVisionAttention(nn.Module):
    def split_qkv(self, qkv: torch.Tensor) -> tuple[torch.Tensor, ...]:
        # [s, b, 3 * head * head_dim]
        seq_len, bs, _ = qkv.shape
        if self.tp_size > 1:
            qkv = all_gather_interleave(qkv, self.qkv.hidden_size,
                                        self.tp_size)
        # [s, b, 3 * head * head_dim] -> 3 * [s, b, head * head_dim]
        q, k, v = qkv.chunk(3, dim=2)
        # 3 * [s, b, head * head_dim]
        if self.tp_size > 1:
            splitter = partial(
                dist_utils.split_tensor_along_last_dim,
                num_partitions=self.tp_size,
            )
            q = splitter(q)[self.tp_rank]
            k = splitter(k)[self.tp_rank]
            v = splitter(v)[self.tp_rank]
        # 3 * [s, b, head * head_dim] -> 3 * [s, b, head, head_dim]
        new_shape = (
            seq_len,