Use LLVMSymbolizer directly for unwind inside fbcode (#108800)

Using LLVMSymbolizer directly avoids having to call fork which has caused timeouts in some circumstances. Differential Revision: [D49070589](https://our.internmc.facebook.com/intern/diff/D49070589/) Pull Request resolved: https://github.com/pytorch/pytorch/pull/108800 Approved by: https://github.com/aaronenyeshi
2025-10-20 21:14:14 +08:00 · 2023-10-03 16:42:11 -07:00
parent 510ec7e3c5
commit 3fe3439242
4 changed files with 69 additions and 11 deletions
--- a/build_variables.bzl
+++ b/build_variables.bzl
@ -403,6 +403,7 @@ core_sources_full_mobile_no_backend_interface_xplat = [
    "torch/csrc/jit/tensorexpr/unique_name_manager.cpp",
    "torch/csrc/jit/testing/file_check.cpp",
    "torch/csrc/profiler/unwind/unwind.cpp",
+    "torch/csrc/profiler/unwind/unwind_fb.cpp",
    "torch/csrc/profiler/combined_traceback.cpp",
    "torch/csrc/jit/testing/hooks_for_testing.cpp",
    "torch/csrc/utils/cpp_stacktraces.cpp",
--- a/torch/csrc/profiler/unwind/unwind.cpp
+++ b/torch/csrc/profiler/unwind/unwind.cpp
@ -27,6 +27,7 @@ Stats stats() {
 } // namespace torch

 #else
+
 #include <c10/util/flat_hash_map.h>
 #include <elf.h>
 #include <link.h>
@ -319,6 +320,19 @@ std::vector<void*> unwind() {
  return frames;
 }

+c10::optional<std::pair<std::string, uint64_t>> libraryFor(void* addr) {
+  if (!addr) {
+    return c10::nullopt;
+  }
+  std::shared_lock lock(cache_mutex_);
+  const LibraryInfo* library_info = unwind_cache.findLibraryFor((uint64_t)addr);
+  if (!library_info) {
+    return c10::nullopt;
+  }
+  return std::make_pair(
+      library_info->name(), (uint64_t)addr - library_info->load_bias());
+}
+
 struct Symbolizer {
  static std::lock_guard<std::mutex> guard() {
    static std::mutex mutex;
@ -332,16 +346,15 @@ struct Symbolizer {
    if (frame_map_.count(addr)) {
      return;
    }
-    auto maybe_library =
-        addr ? unwind_cache.findLibraryFor((uint64_t)addr) : nullptr;
+    auto maybe_library = libraryFor(addr);
    if (!maybe_library) {
      frame_map_[addr] = Frame{"??", "<unwind unsupported>", 0};
      return;
    }
    has_pending_results_ = true;
-    auto& entry = getOrCreate(maybe_library->name());
+    auto& entry = getOrCreate(maybe_library->first);
    entry.queried.push_back(addr);
-    auto libaddress = ((uint64_t)addr - maybe_library->load_bias() - 1);
+    auto libaddress = maybe_library->second - 1;
    entry.comm->out() << (void*)libaddress << "\n";
    // we need to make sure we don't write more than 64k bytes to
    // a pipe before reading the results. Otherwise the buffer may
@ -406,13 +419,8 @@ struct Symbolizer {
  };
 };

-#ifdef FBCODE_CAFFE2
-// in CUDA binaries, we have to use the internal symbolizer because
-// addr2line seems to hang.
-__attribute__((weak))
-#endif
-std::vector<Frame>
-symbolize(const std::vector<void*>& frames) {
+#ifndef FBCODE_CAFFE2
+std::vector<Frame> symbolize(const std::vector<void*>& frames) {
  auto guard = Symbolizer::guard();
  Symbolizer& s = Symbolizer::get();
  for (auto f : frames) {
@ -425,6 +433,7 @@ symbolize(const std::vector<void*>& frames) {
  }
  return results;
 }
+#endif

 Stats stats() {
  return unwind_cache.stats();
--- a/torch/csrc/profiler/unwind/unwind.h
+++ b/torch/csrc/profiler/unwind/unwind.h
@ -1,5 +1,6 @@
 #pragma once
 #include <c10/macros/Export.h>
+#include <c10/util/Optional.h>
 #include <string>
 #include <vector>

@ -23,6 +24,10 @@ struct Frame {
 // symbolize.
 TORCH_API std::vector<Frame> symbolize(const std::vector<void*>& frames);

+// returns path to the library, and the offset of the addr inside the library
+TORCH_API c10::optional<std::pair<std::string, uint64_t>> libraryFor(
+    void* addr);
+
 struct Stats {
  size_t hits = 0;
  size_t misses = 0;
--- a/torch/csrc/profiler/unwind/unwind_fb.cpp
+++ b/torch/csrc/profiler/unwind/unwind_fb.cpp
@ -0,0 +1,43 @@
+#if defined(__linux__) && defined(__x86_64__) && defined(__has_include) && \
+    __has_include("ext/stdio_filebuf.h") && defined(FBCODE_CAFFE2)
+#include <c10/util/flat_hash_map.h>
+#include <llvm/DebugInfo/Symbolize/Symbolize.h>
+#include <torch/csrc/profiler/unwind/unwind.h>
+
+namespace torch {
+namespace unwind {
+
+std::vector<Frame> symbolize(const std::vector<void*>& frames) {
+  static std::mutex symbolize_mutex;
+  static llvm::symbolize::LLVMSymbolizer symbolizer;
+  static ska::flat_hash_map<void*, Frame> frame_map_;
+
+  std::lock_guard<std::mutex> guard(symbolize_mutex);
+  std::vector<Frame> results;
+  results.reserve(frames.size());
+  for (auto addr : frames) {
+    if (!frame_map_.count(addr)) {
+      auto frame = Frame{"??", "<unwind unsupported>", 0};
+      auto maybe_library = libraryFor(addr);
+      if (maybe_library) {
+        auto libaddress = maybe_library->second - 1;
+        auto r = symbolizer.symbolizeCode(
+            maybe_library->first,
+            {libaddress, llvm::object::SectionedAddress::UndefSection});
+        if (r) {
+          frame.filename = r->FileName;
+          frame.funcname = r->FunctionName;
+          frame.lineno = r->Line;
+        }
+      }
+      frame_map_[addr] = std::move(frame);
+    }
+    results.emplace_back(frame_map_[addr]);
+  }
+  return results;
+}
+
+} // namespace unwind
+} // namespace torch
+
+#endif