Files
pytorch/tools/build_with_debinfo.py
Nikita Shulga e8a9d088c6 [DevX] Add tool and doc on partial debug builds (#116521)
Turned command sequence mentioned in https://dev-discuss.pytorch.org/t/how-to-get-a-fast-debug-build/1597 and in various discussions into a tool that I use almost daily to debug crashes or correctness issues in the codebase

Essentially it allows one to turn this:
```
Process 87729 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
    frame #0: 0x00000001023d55a8 libtorch_python.dylib`at::indexing::impl::applySelect(at::Tensor const&, long long, c10::SymInt, long long, c10::Device const&, std::__1::optional<c10::ArrayRef<c10::SymInt>> const&)
libtorch_python.dylib`at::indexing::impl::applySelect:
->  0x1023d55a8 <+0>:  sub    sp, sp, #0xd0
    0x1023d55ac <+4>:  stp    x24, x23, [sp, #0x90]
    0x1023d55b0 <+8>:  stp    x22, x21, [sp, #0xa0]
    0x1023d55b4 <+12>: stp    x20, x19, [sp, #0xb0]
```
into this
```
Process 87741 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
    frame #0: 0x00000001024e2628 libtorch_python.dylib`at::indexing::impl::applySelect(self=0x00000001004ee8a8, dim=0, index=(data_ = 3), real_dim=0, (null)=0x000000016fdfe535, self_sizes= Has Value=true ) at TensorIndexing.h:239:7
   236 	    const at::Device& /*self_device*/,
   237 	    const c10::optional<SymIntArrayRef>& self_sizes) {
   238 	  // See NOTE [nested tensor size for indexing]
-> 239 	  if (self_sizes.has_value()) {
   240 	    auto maybe_index = index.maybe_as_int();
   241 	    if (maybe_index.has_value()) {
   242 	      TORCH_CHECK_INDEX(
```
while retaining good performance for the rest of the codebase
Pull Request resolved: https://github.com/pytorch/pytorch/pull/116521
Approved by: https://github.com/atalman
2023-12-29 05:15:35 +00:00

116 lines
3.7 KiB
Python
Executable File

#!/usr/bin/env python3
# Tool quickly rebuild one or two files with debug info
# Mimics following behavior:
# - touch file
# - ninja -j1 -v -n torch_python | sed -e 's/-O[23]/-g/g' -e 's#\[[0-9]\+\/[0-9]\+\] \+##' |sh
# - Copy libs from build/lib to torch/lib folder
import subprocess
import sys
from pathlib import Path
from typing import Any, List, Optional, Tuple
PYTORCH_ROOTDIR = Path(__file__).resolve().parent.parent
TORCH_DIR = PYTORCH_ROOTDIR / "torch"
TORCH_LIB_DIR = TORCH_DIR / "lib"
BUILD_DIR = PYTORCH_ROOTDIR / "build"
BUILD_LIB_DIR = BUILD_DIR / "lib"
def check_output(args: List[str], cwd: Optional[str] = None) -> str:
return subprocess.check_output(args, cwd=cwd).decode("utf-8")
def parse_args() -> Any:
from argparse import ArgumentParser
parser = ArgumentParser(description="Incremental build PyTorch with debinfo")
parser.add_argument("--verbose", action="store_true")
parser.add_argument("files", nargs="?", action="append")
return parser.parse_args()
def get_lib_extension() -> str:
if sys.platform == "linux":
return "so"
if sys.platform == "darwin":
return "dylib"
raise RuntimeError(f"Usupported platform {sys.platform}")
def create_symlinks() -> None:
"""Creates symlinks from build/lib to torch/lib"""
if not TORCH_LIB_DIR.exists():
raise RuntimeError(f"Can't create symlinks as {TORCH_LIB_DIR} does not exist")
if not BUILD_LIB_DIR.exists():
raise RuntimeError(f"Can't create symlinks as {BUILD_LIB_DIR} does not exist")
for torch_lib in TORCH_LIB_DIR.glob(f"*.{get_lib_extension()}"):
if torch_lib.is_symlink():
continue
build_lib = BUILD_LIB_DIR / torch_lib.name
if not build_lib.exists():
raise RuntimeError(f"Can't find {build_lib} corresponding to {torch_lib}")
torch_lib.unlink()
torch_lib.symlink_to(build_lib)
def has_build_ninja() -> bool:
return (BUILD_DIR / "build.ninja").exists()
def is_devel_setup() -> bool:
output = check_output([sys.executable, "-c", "import torch;print(torch.__file__)"])
return output.strip() == str(TORCH_DIR / "__init__.py")
def create_build_plan() -> List[Tuple[str, str]]:
output = check_output(
["ninja", "-j1", "-v", "-n", "torch_python"], cwd=str(BUILD_DIR)
)
rc = []
for line in output.split("\n"):
if not line.startswith("["):
continue
line = line.split("]", 1)[1].strip()
if line.startswith(": &&") and line.endswith("&& :"):
line = line[4:-4]
line = line.replace("-O2", "-g").replace("-O3", "-g")
name = line.split("-o ", 1)[1].split(" ")[0]
rc.append((name, line))
return rc
def main() -> None:
if sys.platform == "win32":
print("Not supported on Windows yet")
sys.exit(-95)
if not is_devel_setup():
print(
"Not a devel setup of PyTorch, please run `python3 setup.py develop --user` first"
)
sys.exit(-1)
if not has_build_ninja():
print("Only ninja build system is supported at the moment")
sys.exit(-1)
args = parse_args()
for file in args.files:
if file is None:
continue
Path(file).touch()
build_plan = create_build_plan()
if len(build_plan) == 0:
return print("Nothing to do")
if len(build_plan) > 100:
print("More than 100 items needs to be rebuild, run `ninja torch_python` first")
sys.exit(-1)
for idx, (name, cmd) in enumerate(build_plan):
print(f"[{idx + 1 } / {len(build_plan)}] Building {name}")
if args.verbose:
print(cmd)
subprocess.check_call(["sh", "-c", cmd], cwd=BUILD_DIR)
create_symlinks()
if __name__ == "__main__":
main()