mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Fix flaky SIGSEGV crash in test_profile_memory (#136304)
Fixes https://github.com/pytorch/pytorch/issues/132331 We need another barrier here to ensure that the main thread doesn't stop the profiler while other threads are still using it (and crash). I can reliably reproduce the issue with `pytest -v test/profiler/test_cpp_thread.py -k test_profile_memory --flake-finder`. ### Testing `pytest -v test/profiler/test_cpp_thread.py --flake-finder` all passes. Pull Request resolved: https://github.com/pytorch/pytorch/pull/136304 Approved by: https://github.com/briancoutinho
This commit is contained in:
@ -47,6 +47,8 @@ void start_threads(int thread_count, int iteration_count, bool attach) {
|
|||||||
|
|
||||||
static std::atomic<int> barrier = 0;
|
static std::atomic<int> barrier = 0;
|
||||||
barrier = 0;
|
barrier = 0;
|
||||||
|
static std::atomic<int> another_barrier = 0;
|
||||||
|
another_barrier = 0;
|
||||||
thread_local bool enabled_in_main_thread = false;
|
thread_local bool enabled_in_main_thread = false;
|
||||||
|
|
||||||
std::vector<std::thread> threads;
|
std::vector<std::thread> threads;
|
||||||
@ -78,6 +80,14 @@ void start_threads(int thread_count, int iteration_count, bool attach) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ProfilerEventHandler::Handler->emulateTraining(iteration, id);
|
ProfilerEventHandler::Handler->emulateTraining(iteration, id);
|
||||||
|
|
||||||
|
// We need another barrier here to ensure that the main thread doesn't
|
||||||
|
// stop the profiler while other threads are still using it. This fixes
|
||||||
|
// https://github.com/pytorch/pytorch/issues/132331
|
||||||
|
++another_barrier;
|
||||||
|
while (another_barrier % thread_count) {
|
||||||
|
std::this_thread::yield();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user