[Dynamo] Log innermost user frame filename & lineno for better error aggregation (#115899)

CompilationMetrics example:
```
frame_key='1',
co_name='fn',
co_filename='/data/users/ybliang/debug/debug1.py',
co_firstlineno=58,
cache_size=0,
accumulated_cache_size=0,
guard_count=None,
graph_op_count=None,
graph_node_count=None,
graph_input_count=None,
entire_frame_compile_time_s=None,
backend_compile_time_s=None,
fail_type="<class 'torch._dynamo.exc.Unsupported'>",
fail_reason='custome dict init with args/kwargs unimplemented',
fail_user_frame_filename='/data/users/ybliang/debug/debug1.py',
fail_user_frame_lineno=61
```
where:
* ```fail_type``` and ```fail_reason``` are exceptions inside of Dynamo.
* ```fail_user_frame_filename``` and ```fail_user_frame_lineno``` are where the original user code triggered the exception.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/115899
Approved by: https://github.com/davidberard98, https://github.com/ydwu4
This commit is contained in:
Yanbo Liang
2023-12-15 08:24:52 +00:00
committed by PyTorch MergeBot
parent 4edc921857
commit b4d6443bcf
3 changed files with 21 additions and 1 deletions

View File

@ -453,7 +453,10 @@ def _compile(
output: Optional[OutputGraph] = None
# This is shared across restarts
mutated_closure_cell_contents: Set[str] = set()
fail_type: Optional[str] = None
fail_reason: Optional[str] = None
fail_user_frame_filename: Optional[str] = None
fail_user_frame_lineno: Optional[int] = None
speculation_log = SpeculationLog()
@preserve_global_state
@ -610,12 +613,20 @@ def _compile(
UncapturedHigherOrderOpError,
BisectValidationException,
) as e:
fail_type = str(type(e))
fail_reason = str(e)
exception_handler(e, code, frame, export=export)
if e.innermost_user_frame_summary is not None: # type: ignore[union-attr]
fail_user_frame_filename = e.innermost_user_frame_summary.filename # type: ignore[union-attr]
fail_user_frame_lineno = e.innermost_user_frame_summary.lineno # type: ignore[union-attr]
raise
except Exception as e:
fail_type = str(type(e))
fail_reason = str(e)
exception_handler(e, code, frame, export=export)
if e.innermost_user_frame_summary is not None: # type: ignore[attr-defined]
fail_user_frame_filename = e.innermost_user_frame_summary.filename # type: ignore[attr-defined]
fail_user_frame_lineno = e.innermost_user_frame_summary.lineno # type: ignore[attr-defined]
raise InternalTorchDynamoError(str(e)).with_traceback(
e.__traceback__
) from None
@ -670,7 +681,10 @@ def _compile(
graph_input_count,
entire_frame_compile_time,
backend_compile_time,
fail_type,
fail_reason,
fail_user_frame_filename,
fail_user_frame_lineno,
non_compliant_ops,
compliant_custom_ops,
)

View File

@ -214,8 +214,11 @@ class KeyErrorMsg:
def augment_exc_message(exc: Exception, msg: str = "\n", export: bool = False) -> None:
import traceback
exc.innermost_user_frame_summary = None # type: ignore[attr-defined]
real_stack = get_real_stack(exc)
if real_stack is not None:
if real_stack is not None and len(real_stack) > 0:
exc.innermost_user_frame_summary = real_stack[-1] # type: ignore[attr-defined]
msg += f"\nfrom user code:\n {''.join(traceback.format_list(real_stack))}"
if config.replay_record_enabled and hasattr(exc, "record_filename"):

View File

@ -592,7 +592,10 @@ class CompilationMetrics:
graph_input_count: Optional[int]
entire_frame_compile_time_s: Optional[float]
backend_compile_time_s: Optional[float]
fail_type: Optional[str]
fail_reason: Optional[str]
fail_user_frame_filename: Optional[str]
fail_user_frame_lineno: Optional[int]
non_compliant_ops: Set[str]
compliant_custom_ops: Set[str]