[Kernel] update comment for KV shape in unified triton attn (#18099)

Signed-off-by: haochengxia <xhc_1007@163.com>
2025-10-20 14:53:52 +08:00 · 2025-05-20 13:19:34 -05:00
parent e1f5a71ed7
commit 980a172474
1 changed files with 2 additions and 2 deletions
--- a/vllm/attention/ops/triton_unified_attention.py
+++ b/vllm/attention/ops/triton_unified_attention.py
@ -31,8 +31,8 @@ def apply_softcap(S, x):
 def kernel_unified_attention_2d(
    output_ptr,  # [num_tokens, num_query_heads, head_size]
    query_ptr,  # [num_tokens, num_query_heads, head_size]
-    key_cache_ptr,  # [num_blks, num_kv_heads, head_size // x, blk_size, x]
-    value_cache_ptr,  # [num_blks, num_kv_heads, head_size, blk_size]
+    key_cache_ptr,  # [num_blks, blk_size, num_kv_heads, head_size]
+    value_cache_ptr,  # [num_blks, blk_size, num_kv_heads, head_size]
    block_tables_ptr,  # [num_seqs, max_num_blocks_per_seq]
    seq_lens_ptr,  # [num_seqs]
    alibi_slopes_ptr,  # [num_query_heads]