[ARM] Fix infinite recursion in unwind (#134387)

Fixes #119905

The `TORCH_SHOW_CPP_STACKTRACES=1` setting on ARM causes infinite recursive unwind because on failure a `StackTraceFetcher` attempts to unwind the <ins>failed instruction</ins>: 5ad759ca33/torch/csrc/profiler/combined_traceback.cpp (L25)
then the unwind itself fails:
5ad759ca33/torch/csrc/profiler/unwind/unwind.cpp (L10-L12)
and it causes another attempt to unwind the failure in `unwind()`...

In summary, the executed instruction is equivalent to:
```C++
std::vector<void*> unwind() {
  // some instructions ...
  return unwind();
}
```
This PR replaces `TORCH_CHECK` by `TORCH_WARN_ONCE` as it will not cause an uncontrolled recursion. The only side effect would be an empty back-trace.

Huge thanks to @nWEIdia who found the root cause!

Pull Request resolved: https://github.com/pytorch/pytorch/pull/134387
Approved by: https://github.com/eqy, https://github.com/nWEIdia, https://github.com/malfet
This commit is contained in:
Aidyn-A
2024-08-26 21:02:29 +00:00
committed by PyTorch MergeBot
parent 900c5083ed
commit 28a4db84f2
3 changed files with 11 additions and 16 deletions

View File

@ -573,13 +573,8 @@ def run_test(
def try_set_cpp_stack_traces(env, command, set=True):
# Print full c++ stack traces during retries
# Don't do it for macos inductor tests as it makes them
# segfault for some reason
if not (
IS_MACOS and len(command) >= 2 and command[2].startswith(INDUCTOR_TEST_PREFIX)
):
env = env or {}
env["TORCH_SHOW_CPP_STACKTRACES"] = "1" if set else "0"
env = env or {}
env["TORCH_SHOW_CPP_STACKTRACES"] = "1" if set else "0"
return env

View File

@ -170,7 +170,7 @@ static PyObject* THPModule_initExtension(
PyObject* _unused,
PyObject* shm_manager_path) {
HANDLE_TH_ERRORS
#if !defined(FBCODE_CAFFE2)
#if !defined(FBCODE_CAFFE2) && !defined(__aarch64__)
if (torch::get_cpp_stacktraces_enabled()) {
c10::SetStackTraceFetcher([]() -> std::string {
auto tb = torch::CapturedTraceback::gather(false, false, true);

View File

@ -7,29 +7,29 @@
!__has_include("ext/stdio_filebuf.h")
namespace torch::unwind {
std::vector<void*> unwind() {
TORCH_CHECK(
false,
TORCH_WARN_ONCE(
"record_context_cpp is not support on non-linux non-x86_64 platforms");
return {};
}
std::optional<std::pair<std::string, uint64_t>> libraryFor(void* addr) {
TORCH_CHECK(
false,
TORCH_WARN_ONCE(
"record_context_cpp is not support on non-linux non-x86_64 platforms");
return {};
}
#ifndef FBCODE_CAFFE2
std::vector<Frame> symbolize(const std::vector<void*>& frames, Mode mode) {
TORCH_CHECK(
false,
TORCH_WARN_ONCE(
"record_context_cpp is not support on non-linux non-x86_64 platforms");
return {};
}
#endif
Stats stats() {
TORCH_CHECK(
false,
TORCH_WARN_ONCE(
"record_context_cpp is not support on non-linux non-x86_64 platforms");
return {};
}
} // namespace torch::unwind