Log exception_stack_trace to dynamo_compile (#161096)

Note: Adding unit test for this is tricky as having errors in the specific unit test would cause test_utils.py to crash all together.

Tested as follows:
1. Added x = 1/0 after guarded_code = compile_inner(code, one_graph, hooks, transform) in convert_frame.py
2. Printed exception_stack_trace and got: ['Traceback (most recent call last):\n  File "/data/users/jovian/pytorch/torch/_dynamo/convert_frame.py", line 1207, in _compile\n    x = 1/0\n        ~^~\nZeroDivisionError: division by zero\n']

Pull Request resolved: https://github.com/pytorch/pytorch/pull/161096
Approved by: https://github.com/c00w
This commit is contained in:
Jovian Anthony Jaison
2025-08-22 03:29:11 +00:00
committed by PyTorch MergeBot
parent 31a41daff4
commit 2fdd4f918c
3 changed files with 37 additions and 0 deletions

View File

@ -1272,6 +1272,7 @@ def _compile(
start_time_ns = time.time_ns()
fail_type: Optional[str] = None
fail_reason: Optional[str] = None
exception_stack_trace: Optional[list[str]] = None
fail_user_frame_filename: Optional[str] = None
fail_user_frame_lineno: Optional[int] = None
torch._dynamo.utils.ReinplaceCounters.clear()
@ -1300,6 +1301,7 @@ def _compile(
# info here and add it to the metrics context below.
fail_type = type(e).__qualname__
fail_reason = str(e)
exception_stack_trace = [traceback.format_exc()]
exception_handler(e, code, frame, export=export)
# NB: this is the post-mutation exception
torch._logging.trace_structured(
@ -1420,6 +1422,7 @@ def _compile(
),
"stack_trace": stack_trace,
"graph_node_shapes": str(graph_node_shapes),
"exception_stack_trace": exception_stack_trace,
}
# TODO: replace with CompileEventLogger.compilation_metrics
# There are some columns here not in PT2 Compile Events