Fullgraph graph capture with dynamo. (#159749)

Summary: Following up on Avik's doc https://docs.google.com/document/d/11RW0Bbkp1QwFbEu8rCNW5d7wUFaEkxbL0uLyqcc2jTk/edit?tab=t.0 We are experimenting with a new API which utilizes torch.compile(fullgraph=True) and intend to use it to replace the old dynamo.export() API. This PR adds a prototype for the API described in the doc. Test Plan: test_misc -- -k test_aot_capture Rollback Plan: Differential Revision: D79534608 Pull Request resolved: https://github.com/pytorch/pytorch/pull/159749 Approved by: https://github.com/tugsbayasgalan
2025-10-20 21:14:14 +08:00 · 2025-08-12 22:06:18 +00:00
parent 101276f81b
commit 16d15445f8
3 changed files with 138 additions and 4 deletions
--- a/test/dynamo/test_misc.py
+++ b/test/dynamo/test_misc.py
@ -16,11 +16,13 @@ import logging
 import math
 import operator
 import os
+import pickle
 import random
 import sys
 import tempfile
 import threading
 import traceback
+import types
 import typing
 import unittest
 import unittest.mock as mock
@ -8520,6 +8522,50 @@ utils_device.CURRENT_DEVICE == None""".split("\n"):
        self.assertEqual(seen_frames[0].name, "fn")
        self.assertEqual(seen_frames[0].line, "r, r2 = uwu_inline_me(x, y, z)")

+    def test_fullgraph_capture(self):
+        def foo(x):
+            return x + x.shape[0]
+
+        compiled_foo = torch._dynamo.eval_frame.fullgraph_capture(foo)
+        compiled_foo(torch.randn(3, 2))
+        compiled_foo(torch.randn(4))
+        artifacts = compiled_foo.get_artifacts()
+
+        guarded_codes = artifacts.dynamo_artifacts.guarded_codes
+        backend_ids = list(artifacts.backend_inputs.keys())
+        gms = [b.graph_module for b in artifacts.backend_inputs.values()]
+
+        def _convert_to_ep_demo(code, backend_id, gm, args):
+            # Inject compiled function as the original gm
+            new_globals = copy.copy(globals())
+            new_globals[backend_id] = gm
+            # Minimal boilerplate to setup a callable.
+            SerializedCode = type(code.dynamo_code)
+            dynamo_bytecode = SerializedCode.to_code_object(code.dynamo_code)
+            guards_state = pickle.loads(code.guards_state)
+            guard_manager = torch._dynamo.guards.CheckFunctionManager(
+                foo.__code__,
+                guards_state.output_graph,
+                guards_serialization_mode="load",
+                shape_code_parts=guards_state.shape_code_parts,
+                runtime_global_scope=new_globals,
+            ).guard_manager
+
+            class ModuleForExport(torch.nn.Module):
+                def forward(self, x):
+                    return types.FunctionType(dynamo_bytecode, new_globals)(x)
+
+            m = ModuleForExport()
+            return guard_manager, torch.export.export(m, args)
+
+        guards0, ep0 = _convert_to_ep_demo(
+            guarded_codes[0], backend_ids[0], gms[0], (torch.randn(3, 2),)
+        )
+        self.assertTrue(guards0.check({"x": torch.randn(3, 2)}))
+        self.assertFalse(guards0.check({"x": torch.randn(4)}))
+        input0 = torch.randn(3, 2)
+        self.assertEqual(ep0.module()(input0), foo(input0))
+
    def test_torch_guards_stack_frame_register_inlining_deep(self):
        x = torch.tensor([0.5, 0.5])
        y = torch.tensor([0.75, 0.75, 0.75, 0.75])
--- a/torch/_dynamo/eval_frame.py
+++ b/torch/_dynamo/eval_frame.py
@ -113,7 +113,7 @@ from .utils import (
 if TYPE_CHECKING:
    from collections.abc import Iterable, Sequence

-    from torch._dynamo.package import CompilePackage
+    from torch._dynamo.package import CompilePackage, DynamoCaptureOutput
    from torch._dynamo.repro.after_dynamo import WrapBackendDebug
    from torch._subclasses import fake_tensor
    from torch.fx.node import Argument, Node, Target
@ -2288,3 +2288,83 @@ def skip_code(code: types.CodeType) -> None:
    set_code_exec_strategy(
        code, FrameExecStrategy(FrameAction.SKIP, FrameAction.DEFAULT)
    )
+
+
+@dataclass
+class BackendInput:
+    graph_module: torch.fx.GraphModule
+    example_inputs: tuple[Any, ...]
+    fake_mode: torch._subclasses.fake_tensor.FakeTensorMode
+
+
+@dataclass
+class CaptureOutput:
+    """
+    Core data structure that contains the all the information dynamo generates
+    from fullgraph=True. Ideally, this is should be the "return" type if dynamo
+    has a standard API to return compilation artifacts.
+    """
+
+    dynamo_artifacts: DynamoCaptureOutput
+    backend_inputs: dict[str, BackendInput]
+
+
+def fullgraph_capture(model: Callable[..., Any]) -> Callable[..., Any]:
+    """
+    A helper function which wraps a model and returns a callable like optimize().
+    The callable can be called with normal inputs like torch.compile()-ed functions
+    and user can dump dynamo compilation artifacts through `get_artifacts()` call.
+
+    The CaptureOutput is separated into two parts:
+    1. Dynamo specific information from DynamoCaptureOutput, which includes:
+        - guards
+        - generated bytecode
+        - python source information
+    2. Backend specific information (indexed by unique backend id) such as:
+        - fx graph
+        - example inputs
+
+    Example:
+        def fn(*args):
+            ...
+
+        compiled_fn = fullgraph_capture(fn)
+        compiled_fn(args)
+        compiled_fn(another_args)
+        artifacts = compiled_fn.get_artifacts()
+    """
+    from torch._dynamo.package import CompilePackage
+
+    package = CompilePackage(model)
+
+    backend_inputs: dict[str, BackendInput] = {}
+
+    def _backend(
+        gm: torch.fx.GraphModule, example_inputs: tuple[Any, ...]
+    ) -> torch.fx.GraphModule:
+        from torch._guards import TracingContext
+
+        fake_mode = TracingContext.get().fake_mode
+        assert fake_mode is not None
+        backend_id = gm._backend_id
+        assert isinstance(backend_id, str)
+        backend_inputs[backend_id] = BackendInput(gm, example_inputs, fake_mode)
+        return gm
+
+    # TODO For now we use eval_frame to give us the frame. This is can be simplified to
+    #      a manual frame creation helper.
+    optimized_model = optimize(nopython=True, backend=_backend, package=package)(model)
+
+    @functools.wraps(model)
+    def capture_context(*args: Any, **kwargs: Any) -> Any:
+        return optimized_model(*args, **kwargs)
+
+    def get_artifacts() -> CaptureOutput:
+        cache_entry = package.cache_entry()
+        assert len(cache_entry.codes) == 1
+        return CaptureOutput(
+            dynamo_artifacts=cache_entry.codes[0], backend_inputs=backend_inputs
+        )
+
+    capture_context.get_artifacts = get_artifacts  # type: ignore[attr-defined]
+    return capture_context
--- a/torch/_dynamo/package.py
+++ b/torch/_dynamo/package.py
@ -112,7 +112,17 @@ class InlinedSource:


@dataclasses.dataclass
-class _DynamoCodeCacheEntry:
+class DynamoCaptureOutput:
+    """
+    Core information generated from Dynamo for fullgraph=True.
+    """
+
+    guarded_codes: list[_GuardedCodeCacheEntry]
+    backend_ids: list[_BackendId]
+
+
+@dataclasses.dataclass
+class _DynamoCodeCacheEntry(DynamoCaptureOutput):
    """
    Contains the serializable information associated with a single code object
    in dynamo. To restore an execution of compiled code, we will need the following
@ -135,9 +145,7 @@ class _DynamoCodeCacheEntry:
    python_code: SerializedCode
    python_module: str
    function_names: list[_FunctionId]
-    guarded_codes: list[_GuardedCodeCacheEntry]
    import_sources: dict[str, str]
-    backend_ids: list[_BackendId]
    code_source: Optional[str]
    install_to_global: bool
    has_compile_id: bool = False