mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
[Bugfix] Triton FA function takes no keyword arguments (#16902)
Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
This commit is contained in:
@ -1091,7 +1091,14 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
|
|||||||
q,
|
q,
|
||||||
k,
|
k,
|
||||||
maybe_padded_v,
|
maybe_padded_v,
|
||||||
**kwargs,
|
None, # output
|
||||||
|
kwargs["cu_seqlens_q"],
|
||||||
|
kwargs["cu_seqlens_k"],
|
||||||
|
kwargs["max_seqlen_q"],
|
||||||
|
kwargs["max_seqlen_k"],
|
||||||
|
kwargs["causal"],
|
||||||
|
softmax_scale,
|
||||||
|
None, # bias
|
||||||
)
|
)
|
||||||
if is_vllm_fa:
|
if is_vllm_fa:
|
||||||
attn_out = self.flash_attn_varlen_func(
|
attn_out = self.flash_attn_varlen_func(
|
||||||
|
Reference in New Issue
Block a user