mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
[Bugfix] Triton FA function takes no keyword arguments (#16902)
Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
This commit is contained in:
@ -1091,7 +1091,14 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
|
||||
q,
|
||||
k,
|
||||
maybe_padded_v,
|
||||
**kwargs,
|
||||
None, # output
|
||||
kwargs["cu_seqlens_q"],
|
||||
kwargs["cu_seqlens_k"],
|
||||
kwargs["max_seqlen_q"],
|
||||
kwargs["max_seqlen_k"],
|
||||
kwargs["causal"],
|
||||
softmax_scale,
|
||||
None, # bias
|
||||
)
|
||||
if is_vllm_fa:
|
||||
attn_out = self.flash_attn_varlen_func(
|
||||
|
Reference in New Issue
Block a user