mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[ARM] Fix infinite recursion in unwind (#134387)
Fixes #119905 The `TORCH_SHOW_CPP_STACKTRACES=1` setting on ARM causes infinite recursive unwind because on failure a `StackTraceFetcher` attempts to unwind the <ins>failed instruction</ins>:5ad759ca33/torch/csrc/profiler/combined_traceback.cpp (L25)
then the unwind itself fails:5ad759ca33/torch/csrc/profiler/unwind/unwind.cpp (L10-L12)
and it causes another attempt to unwind the failure in `unwind()`... In summary, the executed instruction is equivalent to: ```C++ std::vector<void*> unwind() { // some instructions ... return unwind(); } ``` This PR replaces `TORCH_CHECK` by `TORCH_WARN_ONCE` as it will not cause an uncontrolled recursion. The only side effect would be an empty back-trace. Huge thanks to @nWEIdia who found the root cause! Pull Request resolved: https://github.com/pytorch/pytorch/pull/134387 Approved by: https://github.com/eqy, https://github.com/nWEIdia, https://github.com/malfet
This commit is contained in:
committed by
PyTorch MergeBot
parent
900c5083ed
commit
28a4db84f2
@ -573,13 +573,8 @@ def run_test(
|
||||
|
||||
def try_set_cpp_stack_traces(env, command, set=True):
|
||||
# Print full c++ stack traces during retries
|
||||
# Don't do it for macos inductor tests as it makes them
|
||||
# segfault for some reason
|
||||
if not (
|
||||
IS_MACOS and len(command) >= 2 and command[2].startswith(INDUCTOR_TEST_PREFIX)
|
||||
):
|
||||
env = env or {}
|
||||
env["TORCH_SHOW_CPP_STACKTRACES"] = "1" if set else "0"
|
||||
env = env or {}
|
||||
env["TORCH_SHOW_CPP_STACKTRACES"] = "1" if set else "0"
|
||||
return env
|
||||
|
||||
|
||||
|
@ -170,7 +170,7 @@ static PyObject* THPModule_initExtension(
|
||||
PyObject* _unused,
|
||||
PyObject* shm_manager_path) {
|
||||
HANDLE_TH_ERRORS
|
||||
#if !defined(FBCODE_CAFFE2)
|
||||
#if !defined(FBCODE_CAFFE2) && !defined(__aarch64__)
|
||||
if (torch::get_cpp_stacktraces_enabled()) {
|
||||
c10::SetStackTraceFetcher([]() -> std::string {
|
||||
auto tb = torch::CapturedTraceback::gather(false, false, true);
|
||||
|
@ -7,29 +7,29 @@
|
||||
!__has_include("ext/stdio_filebuf.h")
|
||||
namespace torch::unwind {
|
||||
std::vector<void*> unwind() {
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
TORCH_WARN_ONCE(
|
||||
"record_context_cpp is not support on non-linux non-x86_64 platforms");
|
||||
return {};
|
||||
}
|
||||
|
||||
std::optional<std::pair<std::string, uint64_t>> libraryFor(void* addr) {
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
TORCH_WARN_ONCE(
|
||||
"record_context_cpp is not support on non-linux non-x86_64 platforms");
|
||||
return {};
|
||||
}
|
||||
|
||||
#ifndef FBCODE_CAFFE2
|
||||
std::vector<Frame> symbolize(const std::vector<void*>& frames, Mode mode) {
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
TORCH_WARN_ONCE(
|
||||
"record_context_cpp is not support on non-linux non-x86_64 platforms");
|
||||
return {};
|
||||
}
|
||||
#endif
|
||||
|
||||
Stats stats() {
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
TORCH_WARN_ONCE(
|
||||
"record_context_cpp is not support on non-linux non-x86_64 platforms");
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace torch::unwind
|
||||
|
Reference in New Issue
Block a user