mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
test_attention compat with coming xformers change (#20487)
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
8d763cb891
commit
c5ebe040ac
@ -450,7 +450,8 @@ def test_multi_query_kv_attention(
|
||||
start += seq_len
|
||||
# xformers.AttentionBias to Tensor for use in reference impl.
|
||||
alibi_bias = [
|
||||
b.materialize(b.shape, device=device).squeeze() for b in attn_bias
|
||||
b.materialize((1, num_query_heads, i, i), device=device).squeeze()
|
||||
for b, i in zip(attn_bias, seq_lens)
|
||||
]
|
||||
else:
|
||||
attn_bias = BlockDiagonalCausalMask.from_seqlens(seq_lens)
|
||||
|
Reference in New Issue
Block a user