[BugFix] Patch inductor memory plan logic (#26878)

Signed-off-by: Boyuan Feng <boyuan@meta.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-20 14:53:52 +08:00 · 2025-10-15 05:51:45 -07:00
parent 5d598680e3
commit f57438338d
4 changed files with 108 additions and 6 deletions
--- a/tests/compile/piecewise/test_multiple_graphs.py
+++ b/tests/compile/piecewise/test_multiple_graphs.py
@ -20,6 +20,7 @@ from vllm.config import (
    set_current_vllm_config,
 )
 from vllm.forward_context import BatchDescriptor, set_forward_context
+from vllm.utils import is_torch_equal_or_newer

 # This import automatically registers `torch.ops.silly.attention`
 from .. import silly_attention  # noqa: F401
@ -193,9 +194,8 @@ def run_model(

@pytest.mark.parametrize("use_inductor_graph_partition", [False, True])
 def test_multi_graph_piecewise_compile(use_inductor_graph_partition: bool):
-    if use_inductor_graph_partition:
-        # FIXME(luka/boyuan): this currently fails
-        pytest.skip("Inductor graph partition not supported with multi-graph")
+    if use_inductor_graph_partition and not is_torch_equal_or_newer("2.9.0.dev"):
+        pytest.skip("inductor graph partition is only available in PyTorch 2.9+")

    outputs = []