Compare commits

...

1 Commits

Author SHA1 Message Date
e53382cc2e Sage Moore fixes for full cuda graph support for DeepEP+DeepGEMM LL
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
2025-06-24 11:21:52 -04:00

View File

@ -75,7 +75,8 @@ class FlashMLAMetadataBuilder(MLACommonMetadataBuilder[FlashMLAMetadata]):
1, # MQA for the decode path
)
if self.runner.full_cuda_graph:
n = num_splits.size(0)
if self.runner.full_cuda_graph and (n-1) <= self.runner.cudagraph_batch_sizes[-1]:
# First time around (CUDAGraph capture), allocate the static buffer
if self.cg_buf_tile_scheduler_metadata is None:
self.cg_buf_tile_scheduler_metadata = tile_scheduler_metadata