diff --git a/vllm/v1/attention/backends/mla/cutlass_mla.py b/vllm/v1/attention/backends/mla/cutlass_mla.py index 6e1e5d6533..024f382019 100644 --- a/vllm/v1/attention/backends/mla/cutlass_mla.py +++ b/vllm/v1/attention/backends/mla/cutlass_mla.py @@ -21,7 +21,7 @@ logger = init_logger(__name__) class CutlassMLAMetadataBuilder(MLACommonMetadataBuilder[MLACommonMetadata]): # enable full CUDA Graph support for decode-only capture - attn_cudagraph_support: ClassVar[ + cudagraph_support: ClassVar[ AttentionCGSupport] = AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE