[Bugfix] Triton FA function takes no keyword arguments (#16902)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
This commit is contained in:
vllmellm
2025-04-23 12:35:24 +08:00
committed by GitHub
parent eb8ef4224d
commit 047797ef90

View File

@ -1091,7 +1091,14 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
q,
k,
maybe_padded_v,
**kwargs,
None, # output
kwargs["cu_seqlens_q"],
kwargs["cu_seqlens_k"],
kwargs["max_seqlen_q"],
kwargs["max_seqlen_k"],
kwargs["causal"],
softmax_scale,
None, # bias
)
if is_vllm_fa:
attn_out = self.flash_attn_varlen_func(