From 21a9a93eb4bf8660e24721bb24f69ebb0340f2fb Mon Sep 17 00:00:00 2001 From: Antonio Cuni Date: Tue, 23 Mar 2021 12:27:51 -0700 Subject: [PATCH] gdb special command to print tensors (#54339) Summary: This is something which I wrote because it was useful during my debugging sessions, but I think it might be generally useful to other people as well so I took the liberty of proposing an official `pytorch-gdb` extension. `pytorch-gdb` is a gdb script written in python. Currently, it contains only one command: `torch-tensor-repr`, which prints a human-readable repr of an `at::Tensor` object. Example: ``` Breakpoint 1, at::native::neg (self=...) at [...]/pytorch/aten/src/ATen/native/UnaryOps.cpp:520 520 Tensor neg(const Tensor& self) { return unary_op_impl(self, at::neg_out); } (gdb) # the default repr of 'self' is not very useful (gdb) p self $1 = (const at::Tensor &) 0x7ffff72ed780: {impl_ = {target_ = 0x5555559df6e0}} (gdb) torch-tensor-repr self Python-level repr of self: tensor([1., 2., 3., 4.], dtype=torch.float64) ``` The idea is that by having an official place where to put these things, `pytorch-gdb` will slowly grow other useful features and make the pytorch debugging experience nicer and faster. Pull Request resolved: https://github.com/pytorch/pytorch/pull/54339 Reviewed By: bdhirsh Differential Revision: D27253674 Pulled By: ezyang fbshipit-source-id: dba219e126cc2fe66b2d26740f3a8e3b886e56f5 --- .gdbinit | 14 +++++++++ CONTRIBUTING.md | 63 ++++++++++++++++++++++++++++++++++++++++ tools/gdb/pytorch-gdb.py | 52 +++++++++++++++++++++++++++++++++ torch/csrc/utils.cpp | 56 +++++++++++++++++++++++++++++++++++ 4 files changed, 185 insertions(+) create mode 100644 .gdbinit create mode 100644 tools/gdb/pytorch-gdb.py diff --git a/.gdbinit b/.gdbinit new file mode 100644 index 000000000000..271486850cb8 --- /dev/null +++ b/.gdbinit @@ -0,0 +1,14 @@ +# automatically load the pytoch-gdb extension. +# +# gdb automatically tries to load this file whenever it is executed from the +# root of the pytorch repo, but by default it is not allowed to do so due to +# security reasons. If you want to use pytorch-gdb, please add the following +# line to your ~/.gdbinit (i.e., the .gdbinit file which is in your home +# directory, NOT this file): +# add-auto-load-safe-path /path/to/pytorch/.gdbinit +# +# Alternatively, you can manually load the pytorch-gdb commands into your +# existing gdb session by doing the following: +# (gdb) source /path/to/pytorch/tools/gdb/pytorch-gdb.py + +source tools/gdb/pytorch-gdb.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7c95483b5be7..ffea19ce6f51 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -26,6 +26,7 @@ - [Use CCache](#use-ccache) - [Use a faster linker](#use-a-faster-linker) - [C++ frontend development tips](#c-frontend-development-tips) + - [GDB integration](#gdb-integration) - [CUDA development tips](#cuda-development-tips) - [Windows development tips](#windows-development-tips) - [Known MSVC (and MSVC with NVCC) bugs](#known-msvc-and-msvc-with-nvcc-bugs) @@ -735,6 +736,68 @@ framework, which you can read up about to learn how to configure the test runner submitting a new feature, we care very much that you write appropriate tests. Please follow the lead of the other tests to see how to write a new test case. +### GDB integration + +If you are debugging pytorch inside GDB, you might be interested in +[pytorch-gdb](tools/gdb/pytorch-gdb.py). This script introduces some +pytorch-specific commands which you can use from the GDB prompt. In +particular, `torch-tensor-repr` prints a human-readable repr of an at::Tensor +object. Example of usage: + +``` +$ gdb python +GNU gdb (Ubuntu 9.2-0ubuntu1~20.04) 9.2 +[...] +(gdb) # insert a breakpoint when we call .neg() +(gdb) break at::native:neg +No source file named at::native. +Make breakpoint pending on future shared library load? (y or [n]) y +Breakpoint 1 (at::native:neg) pending. + +(gdb) run +[...] +>>> import torch +>>> t = torch.tensor([1, 2, 3, 4], dtype=torch.float64) +>>> t +tensor([1., 2., 3., 4.], dtype=torch.float64) +>>> t.neg() + +Breakpoint 1, at::native::neg (self=...) at [...]/pytorch/aten/src/ATen/native/UnaryOps.cpp:520 +520 Tensor neg(const Tensor& self) { return unary_op_impl(self, at::neg_out); } +(gdb) # the default repr of 'self' is not very useful +(gdb) p self +$1 = (const at::Tensor &) @0x7ffff72ed780: {impl_ = {target_ = 0x5555559df6e0}} +(gdb) torch-tensor-repr self +Python-level repr of self: +tensor([1., 2., 3., 4.], dtype=torch.float64) +``` + +GDB tries to automatically load `pytorch-gdb` thanks to the +[.gdbinit](.gdbinit) at the root of the pytorch repo. Howevever, auto-loadings is disabled by default, because of security reasons: + +``` +$ gdb +warning: File "/path/to/pytorch/.gdbinit" auto-loading has been declined by your `auto-load safe-path' set to "$debugdir:$datadir/auto-load". +To enable execution of this file add + add-auto-load-safe-path /path/to/pytorch/.gdbinit +line to your configuration file "/home/YOUR-USERNAME/.gdbinit". +To completely disable this security protection add + set auto-load safe-path / +line to your configuration file "/home/YOUR-USERNAME/.gdbinit". +For more information about this security protection see the +"Auto-loading safe path" section in the GDB manual. E.g., run from the shell: + info "(gdb)Auto-loading safe path" +(gdb) +``` + +As gdb itself suggests, the best way to enable auto-loading of `pytorch-gdb` +is to add the following line to your `~/.gdbinit` (i.e., the `.gdbinit` file +which is in your home directory, **not** `/path/to/pytorch/.gdbinit`): +``` +add-auto-load-safe-path /path/to/pytorch/.gdbinit +``` + + ## CUDA development tips If you are working on the CUDA code, here are some useful CUDA debugging tips: diff --git a/tools/gdb/pytorch-gdb.py b/tools/gdb/pytorch-gdb.py new file mode 100644 index 000000000000..a3a6cf37bdaf --- /dev/null +++ b/tools/gdb/pytorch-gdb.py @@ -0,0 +1,52 @@ +import gdb +import textwrap + +class DisableBreakpoints: + """ + Context-manager to temporarily disable all gdb breakpoints, useful if + there is a risk to hit one during the evaluation of one of our custom + commands + """ + + def __enter__(self): + self.disabled_breakpoints = [] + for b in gdb.breakpoints(): + if b.enabled: + b.enabled = False + self.disabled_breakpoints.append(b) + + def __exit__(self, etype, evalue, tb): + for b in self.disabled_breakpoints: + b.enabled = True + +class TensorRepr(gdb.Command): + """ + Print a human readable representation of the given at::Tensor. + Usage: torch-tensor-repr EXP + + at::Tensor instances do not have a C++ implementation of a repr method: in + pytoch, this is done by pure-Python code. As such, torch-tensor-repr + internally creates a Python wrapper for the given tensor and call repr() + on it. + """ + __doc__ = textwrap.dedent(__doc__).strip() + + def __init__(self): + gdb.Command.__init__(self, 'torch-tensor-repr', + gdb.COMMAND_USER, gdb.COMPLETE_EXPRESSION) + + def invoke(self, args, from_tty): + args = gdb.string_to_argv(args) + if len(args) != 1: + print('Usage: torch-tensor-repr EXP') + return + name = args[0] + with DisableBreakpoints(): + res = gdb.parse_and_eval('torch::gdb::tensor_repr(%s)' % name) + print('Python-level repr of %s:' % name) + print(res.string()) + # torch::gdb::tensor_repr returns a malloc()ed buffer, let's free it + gdb.parse_and_eval('(void)free(%s)' % int(res)) + +TensorRepr() + diff --git a/torch/csrc/utils.cpp b/torch/csrc/utils.cpp index 9b153f1533c5..51c0a7da780b 100644 --- a/torch/csrc/utils.cpp +++ b/torch/csrc/utils.cpp @@ -252,3 +252,59 @@ void THPPointer::free() { } template class THPPointer; + +namespace torch { namespace gdb { +/* ~~~ misc debugging utilities ~~~ + * + * torch::gdb::* functions are NOT meant to be called by general pytorch code, + * but only from within a gdb session. As such, utils.h does not contain any + * declaration for those. + */ + +// This is a helper needed by the torch-tensor-repr gdb command. +// Return an human-readable representation of the given Tensor. The resulting +// string is stored into a malloc()ed buffer. The caller is responsible to +// free() it. We use malloc() instead of new[] because it's much easier to +// call free than delete[] from withing gdb. +// Currently the code for computing the repr of a tensor is written in Python, +// so we need to wrap the Tensor into a Python object first. +char *tensor_repr(at::Tensor tensor) { + PyGILState_STATE gil = PyGILState_Ensure(); + PyObject *pytensor = NULL; + PyObject *repr = NULL; + Py_ssize_t bufsize; + const char *buf = NULL; + char *result = NULL; + + pytensor = THPVariable_Wrap(at::Tensor(tensor)); + if (!pytensor) + goto error; + repr = PyObject_Repr(pytensor); + if (!repr) + goto error; + buf = PyUnicode_AsUTF8AndSize(repr, &bufsize); + if (!buf) + goto error; + result = static_cast(malloc(bufsize + 1)); // account for the trailing \0 + if (!result) { + fprintf(stderr, "cannot allocate memory for the result\n"); + goto error; + } + strcpy(result, buf); + Py_XDECREF(pytensor); + Py_XDECREF(repr); + PyGILState_Release(gil); + return result; + +error: + fprintf(stderr, "torch::gdb::tensor_repr: unexpected error\n"); + if (PyErr_Occurred()) + PyErr_Print(); + Py_XDECREF(pytensor); + Py_XDECREF(repr); + free(result); + PyGILState_Release(gil); + return NULL; +} + +}} // namespace torch::gdb