[MISC] Fix misleading batch_size_capture_list when cuda_graph_sizes < 4 (#25829)

Signed-off-by: billishyahao <bill.he@amd.com> Co-authored-by: Luka Govedic <ProExpertProg@users.noreply.github.com> Signed-off-by: yewentao256 <zhyanwentao@126.com>
2025-11-11 16:50:52 +08:00 · 2025-10-01 20:39:45 +08:00
parent e8773e620f
commit a561b9832d
1 changed files with 6 additions and 3 deletions
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@ -580,9 +580,12 @@ class VllmConfig:
            not self.model_config.enforce_eager:
            cuda_graph_sizes = self.scheduler_config.cuda_graph_sizes
            if len(cuda_graph_sizes) == 1:
-                batch_size_capture_list = [1, 2, 4] + [
-                    i for i in range(8, cuda_graph_sizes[0] + 1, 8)
-                ]
+                max_graph_size = cuda_graph_sizes[0]
+                assert max_graph_size >= 1, "Maximum cudagraph size should be" \
+                                            " greater than or equal to 1."
+                batch_size_capture_list = [
+                    i for i in [1, 2, 4] if i <= max_graph_size
+                ] + list(range(8, max_graph_size + 1, 8))
            elif len(cuda_graph_sizes) > 1:
                batch_size_capture_list = sorted(cuda_graph_sizes)
            else: