mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
[BugFix] fix CUTLASS MLA full cudagraph (#23200)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Signed-off-by: simon-mo <simon.mo@hey.com>
This commit is contained in:
committed by
simon-mo
parent
aab549870d
commit
3a8708f60a
@ -21,7 +21,7 @@ logger = init_logger(__name__)
|
|||||||
|
|
||||||
class CutlassMLAMetadataBuilder(MLACommonMetadataBuilder[MLACommonMetadata]):
|
class CutlassMLAMetadataBuilder(MLACommonMetadataBuilder[MLACommonMetadata]):
|
||||||
# enable full CUDA Graph support for decode-only capture
|
# enable full CUDA Graph support for decode-only capture
|
||||||
attn_cudagraph_support: ClassVar[
|
cudagraph_support: ClassVar[
|
||||||
AttentionCGSupport] = AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE
|
AttentionCGSupport] = AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user