Files
pytorch/test/profiler/test_python_tracer.py
Denghui Dong 8b0be7b65a [Profiler] Fix unexpected C return events (#159574)
The fix in https://github.com/pytorch/pytorch/pull/155446 addressed the "stack empty" issue that's easily reproducible on CPython 3.12.0-4. While this issue can also appear in other versions, it's not as easy to reproduce there.

I recently found a new cause for this problem.

1df5d00145/Python/ceval.c (L5807-L5836)

In the CPython 3.10 implementation, PyTrace_C_CALL and PyTrace_C_RETURN/PyTrace_C_EXCEPTION are supposed to appear in pairs. However, when c_profilefunc is changed, unexpected PyTrace_C_RETURN/PyTrace_C_EXCEPTION events can occur.

Here is the code to reproduce this problem.

```
import threading
import time
import torch

from threading import Event, Lock

lock = Lock()
lock.acquire()

event1 = Event()
event2 = Event()
event3 = Event()

def run():
    event1.set()
    event2.wait()
    lock.acquire()
    event3.set()

threading.Thread(target=run).start()

with torch.profiler.profile(activities=[torch.profiler.ProfilerActivity.CPU], with_stack=True):
    event1.wait()
    event2.set()
    time.sleep(1)

with torch.profiler.profile(activities=[torch.profiler.ProfilerActivity.CPU], with_stack=True):
    lock.release()
    event3.wait()
```

<img width="1766" height="1250" alt="image" src="https://github.com/user-attachments/assets/6794eeca-7364-429e-91eb-62cdad116bd3" />

To fix this problem, we can record active_frames_ and remaining_start_frames_ for each thread, and when the PyTrace_C-RETURN/PyTrace_CEXT CEPTION event occurs, we can determine whether to record this event based on these two fields.

In reality, even without this fix, the final data appears to be right since the match process can handle this case (it would just result in an exception log being printed).

Do you think the fix is necessary?

Pull Request resolved: https://github.com/pytorch/pytorch/pull/159574
Approved by: https://github.com/sraikund16
2025-08-07 01:17:55 +00:00

110 lines
2.9 KiB
Python

# Owner(s): ["oncall: profiler"]
import json
import subprocess
import sys
import time
from torch.profiler import profile, ProfilerActivity
from torch.testing._internal.common_utils import (
run_tests,
skipIfPythonVersionMismatch,
TemporaryFileName,
TestCase,
)
class TestPythonTracer(TestCase):
@skipIfPythonVersionMismatch(lambda major, minor, micro: major == 3 and minor == 12)
def test_method_with_c_function(self):
class A:
method_with_c_function = classmethod(repr)
def get_key(x):
A().method_with_c_function()
time.sleep(1.2)
return len(x)
names = ["Alice", "Bob"]
with profile(
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], with_stack=True
) as prof:
sorted(names, key=get_key)
with TemporaryFileName(mode="w+") as fname:
prof.export_chrome_trace(fname)
with open(fname) as f:
events = json.load(f)["traceEvents"]
found = False
for event in events:
if (
event.get("cat", "") == "python_function"
and event.get("name", "") == "<built-in function sorted>"
):
duration = event.get("dur", 0)
if duration >= 2000000:
found = True
break
self.assertTrue(found)
@skipIfPythonVersionMismatch(lambda major, minor, micro: major == 3 and minor == 12)
def test_monitoring_callback(self):
vi = sys.version_info
from sys import monitoring
with profile(
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], with_stack=True
):
name = monitoring.get_tool(2)
if vi.micro < 5:
self.assertEqual(name, "PyTorch Profiler")
else:
self.assertEqual(name, None)
name = monitoring.get_tool(2)
self.assertEqual(name, None)
def test_unexpected_c_return_events(self):
code = """
import threading
import time
import torch
from threading import Event, Lock
lock = Lock()
lock.acquire()
event1 = Event()
event2 = Event()
event3 = Event()
def run():
event1.set()
event2.wait()
lock.acquire()
event3.set()
threading.Thread(target=run).start()
with torch.profiler.profile(activities=[torch.profiler.ProfilerActivity.CPU], with_stack=True):
event1.wait()
event2.set()
time.sleep(1)
with torch.profiler.profile(activities=[torch.profiler.ProfilerActivity.CPU], with_stack=True):
lock.release()
event3.wait()
"""
result = subprocess.run(
[sys.executable, "-c", code], capture_output=True, text=True, check=True
)
self.assertFalse(
"Python replay stack is empty during pop operation" in result.stderr
)
if __name__ == "__main__":
run_tests()