mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
The fix in https://github.com/pytorch/pytorch/pull/155446 addressed the "stack empty" issue that's easily reproducible on CPython 3.12.0-4. While this issue can also appear in other versions, it's not as easy to reproduce there.
I recently found a new cause for this problem.
1df5d00145/Python/ceval.c (L5807-L5836)
In the CPython 3.10 implementation, PyTrace_C_CALL and PyTrace_C_RETURN/PyTrace_C_EXCEPTION are supposed to appear in pairs. However, when c_profilefunc is changed, unexpected PyTrace_C_RETURN/PyTrace_C_EXCEPTION events can occur.
Here is the code to reproduce this problem.
```
import threading
import time
import torch
from threading import Event, Lock
lock = Lock()
lock.acquire()
event1 = Event()
event2 = Event()
event3 = Event()
def run():
event1.set()
event2.wait()
lock.acquire()
event3.set()
threading.Thread(target=run).start()
with torch.profiler.profile(activities=[torch.profiler.ProfilerActivity.CPU], with_stack=True):
event1.wait()
event2.set()
time.sleep(1)
with torch.profiler.profile(activities=[torch.profiler.ProfilerActivity.CPU], with_stack=True):
lock.release()
event3.wait()
```
<img width="1766" height="1250" alt="image" src="https://github.com/user-attachments/assets/6794eeca-7364-429e-91eb-62cdad116bd3" />
To fix this problem, we can record active_frames_ and remaining_start_frames_ for each thread, and when the PyTrace_C-RETURN/PyTrace_CEXT CEPTION event occurs, we can determine whether to record this event based on these two fields.
In reality, even without this fix, the final data appears to be right since the match process can handle this case (it would just result in an exception log being printed).
Do you think the fix is necessary?
Pull Request resolved: https://github.com/pytorch/pytorch/pull/159574
Approved by: https://github.com/sraikund16
110 lines
2.9 KiB
Python
110 lines
2.9 KiB
Python
# Owner(s): ["oncall: profiler"]
|
|
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
|
|
from torch.profiler import profile, ProfilerActivity
|
|
from torch.testing._internal.common_utils import (
|
|
run_tests,
|
|
skipIfPythonVersionMismatch,
|
|
TemporaryFileName,
|
|
TestCase,
|
|
)
|
|
|
|
|
|
class TestPythonTracer(TestCase):
|
|
@skipIfPythonVersionMismatch(lambda major, minor, micro: major == 3 and minor == 12)
|
|
def test_method_with_c_function(self):
|
|
class A:
|
|
method_with_c_function = classmethod(repr)
|
|
|
|
def get_key(x):
|
|
A().method_with_c_function()
|
|
time.sleep(1.2)
|
|
return len(x)
|
|
|
|
names = ["Alice", "Bob"]
|
|
|
|
with profile(
|
|
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], with_stack=True
|
|
) as prof:
|
|
sorted(names, key=get_key)
|
|
|
|
with TemporaryFileName(mode="w+") as fname:
|
|
prof.export_chrome_trace(fname)
|
|
with open(fname) as f:
|
|
events = json.load(f)["traceEvents"]
|
|
found = False
|
|
for event in events:
|
|
if (
|
|
event.get("cat", "") == "python_function"
|
|
and event.get("name", "") == "<built-in function sorted>"
|
|
):
|
|
duration = event.get("dur", 0)
|
|
if duration >= 2000000:
|
|
found = True
|
|
break
|
|
self.assertTrue(found)
|
|
|
|
@skipIfPythonVersionMismatch(lambda major, minor, micro: major == 3 and minor == 12)
|
|
def test_monitoring_callback(self):
|
|
vi = sys.version_info
|
|
from sys import monitoring
|
|
|
|
with profile(
|
|
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], with_stack=True
|
|
):
|
|
name = monitoring.get_tool(2)
|
|
if vi.micro < 5:
|
|
self.assertEqual(name, "PyTorch Profiler")
|
|
else:
|
|
self.assertEqual(name, None)
|
|
name = monitoring.get_tool(2)
|
|
self.assertEqual(name, None)
|
|
|
|
def test_unexpected_c_return_events(self):
|
|
code = """
|
|
import threading
|
|
import time
|
|
import torch
|
|
|
|
from threading import Event, Lock
|
|
|
|
lock = Lock()
|
|
lock.acquire()
|
|
event1 = Event()
|
|
event2 = Event()
|
|
event3 = Event()
|
|
|
|
def run():
|
|
event1.set()
|
|
event2.wait()
|
|
lock.acquire()
|
|
event3.set()
|
|
|
|
threading.Thread(target=run).start()
|
|
|
|
with torch.profiler.profile(activities=[torch.profiler.ProfilerActivity.CPU], with_stack=True):
|
|
event1.wait()
|
|
event2.set()
|
|
time.sleep(1)
|
|
|
|
with torch.profiler.profile(activities=[torch.profiler.ProfilerActivity.CPU], with_stack=True):
|
|
lock.release()
|
|
event3.wait()
|
|
"""
|
|
|
|
result = subprocess.run(
|
|
[sys.executable, "-c", code], capture_output=True, text=True, check=True
|
|
)
|
|
|
|
self.assertFalse(
|
|
"Python replay stack is empty during pop operation" in result.stderr
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
run_tests()
|