diff --git a/aten/src/ATen/detail/MTIAHooksInterface.h b/aten/src/ATen/detail/MTIAHooksInterface.h index 16981789f684..642941cb743f 100644 --- a/aten/src/ATen/detail/MTIAHooksInterface.h +++ b/aten/src/ATen/detail/MTIAHooksInterface.h @@ -126,7 +126,7 @@ struct TORCH_API MTIAHooksInterface : AcceleratorHooksInterface { FAIL_MTIAHOOKS_FUNC(__func__); } - virtual PyObject* memorySnapshot() const { + virtual PyObject* memorySnapshot(const std::optional& local_path) const { FAIL_MTIAHOOKS_FUNC(__func__); return nullptr; } diff --git a/buckbuild.bzl b/buckbuild.bzl index 961d6185853c..747d9afec4b2 100644 --- a/buckbuild.bzl +++ b/buckbuild.bzl @@ -181,6 +181,7 @@ THIRD_PARTY_LIBS = { "pyyaml": ["//third-party/pypi/pyyaml:pyyaml", "//third_party:pyyaml"], "rt": ["//xplat/third-party/linker_lib:rt", "//third_party:rt"], "ruy": ["//third-party/ruy:ruy_xplat_lib", "//third_party:ruy_lib"], + "nlohmann-json": ["fbsource//third-party/nlohmann-json:nlohmann-json", "//third_party:nlohmann-json"], "sleef_arm": ["//third-party/sleef:sleef_arm", "//third_party:sleef_arm"], } @@ -1735,6 +1736,7 @@ def define_buck_targets( deps = [ third_party("glog"), third_party("kineto"), + third_party("nlohmann-json"), ], exported_deps = [ ":aten_cpu", diff --git a/build_variables.bzl b/build_variables.bzl index 7cac3da12100..acf363d37dbb 100644 --- a/build_variables.bzl +++ b/build_variables.bzl @@ -101,6 +101,7 @@ libtorch_profiler_sources = [ "torch/csrc/profiler/collection.cpp", "torch/csrc/profiler/data_flow.cpp", "torch/csrc/profiler/kineto_shim.cpp", + "torch/csrc/mtia/profiler/MTIAMemoryProfiler.cpp", "torch/csrc/profiler/kineto_client_interface.cpp", "torch/csrc/profiler/orchestration/observer.cpp", "torch/csrc/profiler/orchestration/python_tracer.cpp", diff --git a/torch/csrc/autograd/profiler_python.cpp b/torch/csrc/autograd/profiler_python.cpp index 4fdd87c44829..e74b7cd4a7e4 100644 --- a/torch/csrc/autograd/profiler_python.cpp +++ b/torch/csrc/autograd/profiler_python.cpp @@ -1162,7 +1162,7 @@ class PythonMemoryTracer final : public python_tracer::PythonMemoryTracerBase { ~PythonMemoryTracer() override = default; void start() override; void stop() override; - void export_memory_history(const std::string path) override; + void export_memory_history(const std::string& path) override; }; static void toggle_memory_tracing(bool enable) { @@ -1196,7 +1196,7 @@ void PythonMemoryTracer::start() { toggle_memory_tracing(true); } -void PythonMemoryTracer::export_memory_history(const std::string path) { +void PythonMemoryTracer::export_memory_history(const std::string& path) { pybind11::gil_scoped_acquire gil; THPObjectPtr torch_cuda_memory_module( PyImport_ImportModule("torch.cuda.memory")); diff --git a/torch/csrc/mtia/Module.cpp b/torch/csrc/mtia/Module.cpp index 1ea6c6396f17..ee71866e9bd9 100644 --- a/torch/csrc/mtia/Module.cpp +++ b/torch/csrc/mtia/Module.cpp @@ -96,7 +96,8 @@ void initModule(PyObject* module) { }); m.def("_mtia_memorySnapshot", []() { - PyObject* raw_pyobject = at::detail::getMTIAHooks().memorySnapshot(); + PyObject* raw_pyobject = + at::detail::getMTIAHooks().memorySnapshot(std::nullopt); return py::reinterpret_steal(raw_pyobject); }); diff --git a/torch/csrc/mtia/profiler/MTIAMemoryProfiler.cpp b/torch/csrc/mtia/profiler/MTIAMemoryProfiler.cpp new file mode 100644 index 000000000000..4ecc4c9bcf60 --- /dev/null +++ b/torch/csrc/mtia/profiler/MTIAMemoryProfiler.cpp @@ -0,0 +1,35 @@ +#include +#include +#include +#include + +using json = nlohmann::json; + +namespace torch::mtia { + +void MTIAMemoryProfiler::start() { + at::detail::getMTIAHooks().recordMemoryHistory("all", "all", 150000); +} + +void MTIAMemoryProfiler::export_memory_history(const std::string& path) { + at::detail::getMTIAHooks().memorySnapshot(path); + return; +} + +void MTIAMemoryProfiler::stop() { + at::detail::getMTIAHooks().recordMemoryHistory(std::nullopt, "all", 0); +} + +std::unique_ptr +getMemoryTracer() { + return std::make_unique(); +} + +void initMemoryProfiler() { + if (at::detail::isMTIAHooksBuilt()) { + fprintf(stderr, "Initializing MTIA Memory Tracer\n"); + torch::profiler::impl::python_tracer::registerMemoryTracer( + &getMemoryTracer); + } +} +} // namespace torch::mtia diff --git a/torch/csrc/mtia/profiler/MTIAMemoryProfiler.h b/torch/csrc/mtia/profiler/MTIAMemoryProfiler.h new file mode 100644 index 000000000000..8ce22f2af780 --- /dev/null +++ b/torch/csrc/mtia/profiler/MTIAMemoryProfiler.h @@ -0,0 +1,20 @@ +#pragma once +#include + +namespace torch::mtia { +using namespace torch::profiler::impl::python_tracer; + +void initMemoryProfiler(); + +std::unique_ptr getMemoryTracer(); + +class MTIAMemoryProfiler final : public PythonMemoryTracerBase { + public: + explicit MTIAMemoryProfiler() = default; + ~MTIAMemoryProfiler() override = default; + void start() override; + void stop() override; + void export_memory_history(const std::string& path) override; +}; + +} // namespace torch::mtia diff --git a/torch/csrc/profiler/kineto_client_interface.cpp b/torch/csrc/profiler/kineto_client_interface.cpp index 89c824cd578f..bb805ad3d72c 100644 --- a/torch/csrc/profiler/kineto_client_interface.cpp +++ b/torch/csrc/profiler/kineto_client_interface.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -23,7 +24,9 @@ using namespace torch::autograd::profiler; class LibKinetoClient : public libkineto::ClientInterface { public: - void init() override {} + void init() override { + ::torch::mtia::initMemoryProfiler(); + } void prepare( bool report_input_shapes = false, diff --git a/torch/csrc/profiler/orchestration/python_tracer.cpp b/torch/csrc/profiler/orchestration/python_tracer.cpp index 73bdf3ccb017..d5d120d376f2 100644 --- a/torch/csrc/profiler/orchestration/python_tracer.cpp +++ b/torch/csrc/profiler/orchestration/python_tracer.cpp @@ -24,7 +24,7 @@ struct NoOpMemoryPythonTracer : public PythonMemoryTracerBase { ~NoOpMemoryPythonTracer() override = default; void start() override {} void stop() override {} - void export_memory_history(const std::string path) override {} + void export_memory_history(const std::string&) override {} }; } // namespace diff --git a/torch/csrc/profiler/orchestration/python_tracer.h b/torch/csrc/profiler/orchestration/python_tracer.h index 725c6d8a5c95..52387e92e562 100644 --- a/torch/csrc/profiler/orchestration/python_tracer.h +++ b/torch/csrc/profiler/orchestration/python_tracer.h @@ -66,7 +66,7 @@ struct TORCH_API PythonMemoryTracerBase { virtual void start() = 0; virtual void stop() = 0; - virtual void export_memory_history(const std::string path) = 0; + virtual void export_memory_history(const std::string& path) = 0; }; using MakeMemoryFn = std::unique_ptr (*)(); diff --git a/torch/csrc/profiler/python/combined_traceback.cpp b/torch/csrc/profiler/python/combined_traceback.cpp index f9e20541ed86..fc1269ed3498 100644 --- a/torch/csrc/profiler/python/combined_traceback.cpp +++ b/torch/csrc/profiler/python/combined_traceback.cpp @@ -115,6 +115,49 @@ struct PythonTraceback : public CapturedTraceback::Python { } // namespace +std::vector json_symbolize( + std::vector& to_symbolize) { + std::unordered_map cached_frames; + std::vector unique_frames; + for (const auto& sc : to_symbolize) { + auto it = cached_frames.find(sc); + if (it == cached_frames.end()) { + cached_frames.try_emplace(sc, unique_frames.size()); + unique_frames.push_back(sc); + } + } + auto s = symbolize(unique_frames); + + std::string line_s = "line"; + std::string name_s = "name"; + std::string filename_s = "filename"; + std::vector all_frames; + + for (const auto& f : s.all_frames) { + nlohmann::json d; + d[name_s] = f.funcname; + d[filename_s] = f.filename; + d[line_s] = f.lineno; + all_frames.emplace_back(std::move(d)); + } + + std::vector py_unique_frames; + for (const auto& t : s.tracebacks) { + nlohmann::json l; + for (const auto& e : t) { + l.emplace_back(all_frames.at(e)); + } + py_unique_frames.push_back(std::move(l)); + } + + std::vector result; + result.reserve(to_symbolize.size()); + for (const auto& sc : to_symbolize) { + result.push_back(py_unique_frames.at(cached_frames.at(sc))); + } + return result; +} + std::vector py_symbolize( std::vector& to_symbolize) { // we dedup repeated to_symbolize objects to prevent diff --git a/torch/csrc/profiler/python/combined_traceback.h b/torch/csrc/profiler/python/combined_traceback.h index 03b3846822de..7e1f76b5c0c8 100644 --- a/torch/csrc/profiler/python/combined_traceback.h +++ b/torch/csrc/profiler/python/combined_traceback.h @@ -1,5 +1,6 @@ #include +#include #include #include @@ -14,6 +15,10 @@ namespace torch { TORCH_API std::vector py_symbolize( std::vector& to_symbolize); +// Return the callback in json format so that it can be used within cpp +TORCH_API std::vector json_symbolize( + std::vector& to_symbolize); + // requires GIL to be held, frees any pending free frames TORCH_PYTHON_API void freeDeadCapturedTracebackFrames();