[BugFix] Patch inductor memory plan logic (#26878)

Signed-off-by: Boyuan Feng <boyuan@meta.com>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Boyuan Feng
2025-10-15 05:51:45 -07:00
committed by GitHub
parent 5d598680e3
commit f57438338d
4 changed files with 108 additions and 6 deletions

View File

@ -20,6 +20,7 @@ from vllm.config import (
set_current_vllm_config,
)
from vllm.forward_context import BatchDescriptor, set_forward_context
from vllm.utils import is_torch_equal_or_newer
# This import automatically registers `torch.ops.silly.attention`
from .. import silly_attention # noqa: F401
@ -193,9 +194,8 @@ def run_model(
@pytest.mark.parametrize("use_inductor_graph_partition", [False, True])
def test_multi_graph_piecewise_compile(use_inductor_graph_partition: bool):
if use_inductor_graph_partition:
# FIXME(luka/boyuan): this currently fails
pytest.skip("Inductor graph partition not supported with multi-graph")
if use_inductor_graph_partition and not is_torch_equal_or_newer("2.9.0.dev"):
pytest.skip("inductor graph partition is only available in PyTorch 2.9+")
outputs = []