Add option to record C++ backtraces in _record_memory_history (#86145)

I used this to debug https://github.com/pytorch/pytorch/issues/86136 so it is useful. The implementation is not so fast so it is not enabled by default.

Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Pull Request resolved: https://github.com/pytorch/pytorch/pull/86145
Approved by: https://github.com/albanD, https://github.com/zdevito
This commit is contained in:
Edward Z. Yang
2022-10-03 13:56:53 -07:00
committed by PyTorch MergeBot
parent 97d6b5bbf8
commit adf5919720
4 changed files with 58 additions and 6 deletions

View File

@ -536,13 +536,15 @@ struct Frame {
struct StackContext : public c10::cuda::CUDACachingAllocator::Context {
std::vector<Frame> frames;
// Empty if cpp traces weren't enabled
std::string cpp_frames;
~StackContext() {
py::gil_scoped_acquire acquire;
for (auto& f : frames) {
Py_XDECREF((PyObject*)f.code);
}
}
static std::unique_ptr<c10::cuda::CUDACachingAllocator::Context> gather() {
static std::unique_ptr<StackContext> _gather() {
py::gil_scoped_acquire acquire;
auto r = std::make_unique<StackContext>();
PyFrameObject* f = PyEval_GetFrame();
@ -555,6 +557,15 @@ struct StackContext : public c10::cuda::CUDACachingAllocator::Context {
}
return r;
}
static std::unique_ptr<c10::cuda::CUDACachingAllocator::Context> gather() {
return _gather();
}
static std::unique_ptr<c10::cuda::CUDACachingAllocator::Context>
gather_with_cpp() {
auto r = _gather();
r->cpp_frames = c10::get_backtrace();
return std::move(r);
}
};
PyObject* THCPModule_memorySnapshot(PyObject* _unused, PyObject* noargs) {
@ -584,6 +595,7 @@ PyObject* THCPModule_memorySnapshot(PyObject* _unused, PyObject* noargs) {
py::str name_s = "name";
py::str line_s = "line";
py::str frames_s = "frames";
py::str cpp_frames_s = "cpp_frames";
py::str history_s = "history";
py::str blocks_s = "blocks";
@ -626,6 +638,9 @@ PyObject* THCPModule_memorySnapshot(PyObject* _unused, PyObject* noargs) {
frame[line_s] = PyCode_Addr2Line(f.code, f.lasti);
frames.append(std::move(frame));
}
if (!sc->cpp_frames.empty()) {
history_entry[cpp_frames_s] = py::cast(sc->cpp_frames);
}
history_entry[frames_s] = std::move(frames);
}
h = h->next.get();
@ -725,9 +740,10 @@ static void registerCudaDeviceProperties(PyObject* module) {
return stream.str();
});
m.def("_cuda_recordMemoryHistory", [](bool enabled) {
m.def("_cuda_recordMemoryHistory", [](bool enabled, bool cpp) {
c10::cuda::CUDACachingAllocator::setContextRecorder(
enabled ? StackContext::gather : nullptr);
enabled ? (cpp ? StackContext::gather_with_cpp : StackContext::gather)
: nullptr);
});
}