From e8a9d088c694a49c414152ef6ea28b475e8f85f2 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nikita.shulga@gmail.com>
Date: Fri, 29 Dec 2023 05:15:35 +0000
Subject: [PATCH] [DevX] Add tool and doc on partial debug builds (#116521)

Turned command sequence mentioned in https://dev-discuss.pytorch.org/t/how-to-get-a-fast-debug-build/1597 and in various discussions into a tool that I use almost daily to debug crashes or correctness issues in the codebase

Essentially it allows one to turn this:
```
Process 87729 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
    frame #0: 0x00000001023d55a8 libtorch_python.dylib`at::indexing::impl::applySelect(at::Tensor const&, long long, c10::SymInt, long long, c10::Device const&, std::__1::optional<c10::ArrayRef<c10::SymInt>> const&)
libtorch_python.dylib`at::indexing::impl::applySelect:
->  0x1023d55a8 <+0>:  sub    sp, sp, #0xd0
    0x1023d55ac <+4>:  stp    x24, x23, [sp, #0x90]
    0x1023d55b0 <+8>:  stp    x22, x21, [sp, #0xa0]
    0x1023d55b4 <+12>: stp    x20, x19, [sp, #0xb0]
```
into this
```
Process 87741 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
    frame #0: 0x00000001024e2628 libtorch_python.dylib`at::indexing::impl::applySelect(self=0x00000001004ee8a8, dim=0, index=(data_ = 3), real_dim=0, (null)=0x000000016fdfe535, self_sizes= Has Value=true ) at TensorIndexing.h:239:7
   236 	    const at::Device& /*self_device*/,
   237 	    const c10::optional<SymIntArrayRef>& self_sizes) {
   238 	  // See NOTE [nested tensor size for indexing]
-> 239 	  if (self_sizes.has_value()) {
   240 	    auto maybe_index = index.maybe_as_int();
   241 	    if (maybe_index.has_value()) {
   242 	      TORCH_CHECK_INDEX(
```
while retaining good performance for the rest of the codebase
Pull Request resolved: https://github.com/pytorch/pytorch/pull/116521
Approved by: https://github.com/atalman
---
 CONTRIBUTING.md             |  61 +++++++++++++++++++
 tools/build_with_debinfo.py | 115 ++++++++++++++++++++++++++++++++++++
 2 files changed, 176 insertions(+)
 create mode 100755 tools/build_with_debinfo.py
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 38ffa5977e0e..270e20a0b99b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -41,6 +41,7 @@ aspects of contributing to PyTorch.
     - [Use a faster linker](#use-a-faster-linker)
     - [Use pre-compiled headers](#use-pre-compiled-headers)
     - [Workaround for header dependency bug in nvcc](#workaround-for-header-dependency-bug-in-nvcc)
+  - [Rebuild few files with debug information](#rebuild-few-files-with-debug-information)
   - [C++ frontend development tips](#c-frontend-development-tips)
   - [GDB integration](#gdb-integration)
   - [C++ stacktraces](#c-stacktraces)
@@ -811,6 +812,66 @@ export CMAKE_CUDA_COMPILER_LAUNCHER="python;`pwd`/tools/nvcc_fix_deps.py;ccache"
 python setup.py develop
 ```
 
+### Rebuild few files with debug information
+
+While debugging a problem one often had to maintain a debug build in a separate folder.
+But often only a few files needs to be rebuild with debug info to get a symbolicated backtrace or enable source debugging
+One can easily solve this with the help of `tools/build_with_debinfo.py`
+
+For example, suppose one wants to debug what is going on while tensor index is selected, which can be achieved by setting a breakpoint at `applySelect` function:
+```
+% lldb -o "b applySelect" -o "process launch" -- python3 -c "import torch;print(torch.rand(5)[3])"
+(lldb) target create "python"
+Current executable set to '/usr/bin/python3' (arm64).
+(lldb) settings set -- target.run-args  "-c" "import torch;print(torch.rand(5)[3])"
+(lldb) b applySelect
+Breakpoint 1: no locations (pending).
+WARNING:  Unable to resolve breakpoint to any actual locations.
+(lldb) process launch
+2 locations added to breakpoint 1
+Process 87729 stopped
+* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
+    frame #0: 0x00000001023d55a8 libtorch_python.dylib`at::indexing::impl::applySelect(at::Tensor const&, long long, c10::SymInt, long long, c10::Device const&, std::__1::optional<c10::ArrayRef<c10::SymInt>> const&)
+libtorch_python.dylib`at::indexing::impl::applySelect:
+->  0x1023d55a8 <+0>:  sub    sp, sp, #0xd0
+    0x1023d55ac <+4>:  stp    x24, x23, [sp, #0x90]
+    0x1023d55b0 <+8>:  stp    x22, x21, [sp, #0xa0]
+    0x1023d55b4 <+12>: stp    x20, x19, [sp, #0xb0]
+Target 0: (python) stopped.
+Process 87729 launched: '/usr/bin/python' (arm64)
+```
+Which is not very informative, but can be easily remedied by rebuilding `python_variable_indexing.cpp` with debug information
+```
+% ./tools/build_with_debinfo.py torch/csrc/autograd/python_variable_indexing.cpp
+[1 / 2] Building caffe2/torch/CMakeFiles/torch_python.dir/csrc/autograd/python_variable_indexing.cpp.o
+[2 / 2] Building lib/libtorch_python.dylib
+```
+And afterwards:
+```
+% lldb -o "b applySelect" -o "process launch" -- python3 -c "import torch;print(torch.rand(5)[3])"
+(lldb) target create "python"
+Current executable set to '/usr/bin/python3' (arm64).
+(lldb) settings set -- target.run-args  "-c" "import torch;print(torch.rand(5)[3])"
+(lldb) b applySelect
+Breakpoint 1: no locations (pending).
+WARNING:  Unable to resolve breakpoint to any actual locations.
+(lldb) process launch
+2 locations added to breakpoint 1
+Process 87741 stopped
+* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
+    frame #0: 0x00000001024e2628 libtorch_python.dylib`at::indexing::impl::applySelect(self=0x00000001004ee8a8, dim=0, index=(data_ = 3), real_dim=0, (null)=0x000000016fdfe535, self_sizes= Has Value=true ) at TensorIndexing.h:239:7
+   236         const at::Device& /*self_device*/,
+   237         const c10::optional<SymIntArrayRef>& self_sizes) {
+   238       // See NOTE [nested tensor size for indexing]
+-> 239       if (self_sizes.has_value()) {
+   240         auto maybe_index = index.maybe_as_int();
+   241         if (maybe_index.has_value()) {
+   242           TORCH_CHECK_INDEX(
+Target 0: (python) stopped.
+Process 87741 launched: '/usr/bin/python3' (arm64)
+```
+Which is much more useful, isn't it?
+
 ### C++ frontend development tips
 
 We have very extensive tests in the [test/cpp/api](test/cpp/api) folder. The
diff --git a/tools/build_with_debinfo.py b/tools/build_with_debinfo.py
new file mode 100755
index 000000000000..0f2bc9b7379c
--- /dev/null
+++ b/tools/build_with_debinfo.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+# Tool quickly rebuild one or two files with debug info
+# Mimics following behavior:
+# - touch file
+# - ninja -j1 -v -n torch_python | sed -e 's/-O[23]/-g/g' -e 's#\[[0-9]\+\/[0-9]\+\] \+##' |sh
+# - Copy libs from build/lib to torch/lib folder
+
+import subprocess
+import sys
+from pathlib import Path
+from typing import Any, List, Optional, Tuple
+
+PYTORCH_ROOTDIR = Path(__file__).resolve().parent.parent
+TORCH_DIR = PYTORCH_ROOTDIR / "torch"
+TORCH_LIB_DIR = TORCH_DIR / "lib"
+BUILD_DIR = PYTORCH_ROOTDIR / "build"
+BUILD_LIB_DIR = BUILD_DIR / "lib"
+
+
+def check_output(args: List[str], cwd: Optional[str] = None) -> str:
+    return subprocess.check_output(args, cwd=cwd).decode("utf-8")
+
+
+def parse_args() -> Any:
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser(description="Incremental build PyTorch with debinfo")
+    parser.add_argument("--verbose", action="store_true")
+    parser.add_argument("files", nargs="?", action="append")
+    return parser.parse_args()
+
+
+def get_lib_extension() -> str:
+    if sys.platform == "linux":
+        return "so"
+    if sys.platform == "darwin":
+        return "dylib"
+    raise RuntimeError(f"Usupported platform {sys.platform}")
+
+
+def create_symlinks() -> None:
+    """Creates symlinks from build/lib to torch/lib"""
+    if not TORCH_LIB_DIR.exists():
+        raise RuntimeError(f"Can't create symlinks as {TORCH_LIB_DIR} does not exist")
+    if not BUILD_LIB_DIR.exists():
+        raise RuntimeError(f"Can't create symlinks as {BUILD_LIB_DIR} does not exist")
+    for torch_lib in TORCH_LIB_DIR.glob(f"*.{get_lib_extension()}"):
+        if torch_lib.is_symlink():
+            continue
+        build_lib = BUILD_LIB_DIR / torch_lib.name
+        if not build_lib.exists():
+            raise RuntimeError(f"Can't find {build_lib} corresponding to {torch_lib}")
+        torch_lib.unlink()
+        torch_lib.symlink_to(build_lib)
+
+
+def has_build_ninja() -> bool:
+    return (BUILD_DIR / "build.ninja").exists()
+
+
+def is_devel_setup() -> bool:
+    output = check_output([sys.executable, "-c", "import torch;print(torch.__file__)"])
+    return output.strip() == str(TORCH_DIR / "__init__.py")
+
+
+def create_build_plan() -> List[Tuple[str, str]]:
+    output = check_output(
+        ["ninja", "-j1", "-v", "-n", "torch_python"], cwd=str(BUILD_DIR)
+    )
+    rc = []
+    for line in output.split("\n"):
+        if not line.startswith("["):
+            continue
+        line = line.split("]", 1)[1].strip()
+        if line.startswith(": &&") and line.endswith("&& :"):
+            line = line[4:-4]
+        line = line.replace("-O2", "-g").replace("-O3", "-g")
+        name = line.split("-o ", 1)[1].split(" ")[0]
+        rc.append((name, line))
+    return rc
+
+
+def main() -> None:
+    if sys.platform == "win32":
+        print("Not supported on Windows yet")
+        sys.exit(-95)
+    if not is_devel_setup():
+        print(
+            "Not a devel setup of PyTorch, please run `python3 setup.py develop --user` first"
+        )
+        sys.exit(-1)
+    if not has_build_ninja():
+        print("Only ninja build system is supported at the moment")
+        sys.exit(-1)
+    args = parse_args()
+    for file in args.files:
+        if file is None:
+            continue
+        Path(file).touch()
+    build_plan = create_build_plan()
+    if len(build_plan) == 0:
+        return print("Nothing to do")
+    if len(build_plan) > 100:
+        print("More than 100 items needs to be rebuild, run `ninja torch_python` first")
+        sys.exit(-1)
+    for idx, (name, cmd) in enumerate(build_plan):
+        print(f"[{idx + 1 } / {len(build_plan)}] Building {name}")
+        if args.verbose:
+            print(cmd)
+        subprocess.check_call(["sh", "-c", cmd], cwd=BUILD_DIR)
+    create_symlinks()
+
+
+if __name__ == "__main__":
+    main()