Fix the missing device in _memory_profiler (#119751)

Fixes #119722, 1, added the missing device in ``` max_memory_allocated = torch.cuda.max_memory_allocated() max_memory_reserved = torch.cuda.max_memory_reserved() ``` 2, fix the device parameter to device_str. Based on [lines](2bda6b4cb8/torch/profiler/profiler.py (L291)), the input device are a string (device_str) for ``` self.mem_tl.export_memory_timeline_html self.mem_tl.export_memory_timeline_raw self.mem_tl.export_memory_timeline ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/119751 Approved by: https://github.com/aaronenyeshi
2025-10-22 06:11:27 +08:00 · 2024-02-15 19:11:13 +00:00
parent 98fd23cccc
commit 143b5f2745
1 changed files with 7 additions and 6 deletions
--- a/torch/profiler/_memory_profiler.py
+++ b/torch/profiler/_memory_profiler.py
@ -1053,11 +1053,11 @@ class MemoryProfileTimeline:
        times = [t_min if t < 0 else t for t in times]
        return times, sizes

-    def export_memory_timeline(self, path, device) -> None:
+    def export_memory_timeline(self, path, device_str) -> None:
        """Saves the memory timeline as [times, sizes by category]
        as a JSON formatted file to the given path for the given
        device."""
-        times, sizes = self._coalesce_timeline(device)
+        times, sizes = self._coalesce_timeline(device_str)
        # TODO: Write a faster serialize (orjson not available in CI)
        import json

@ -1131,7 +1131,7 @@ class MemoryProfileTimeline:
            json.dump(raw_events, f)

    def export_memory_timeline_html(
-        self, path, device, figsize=(20, 12), title=None
+        self, path, device_str, figsize=(20, 12), title=None
    ) -> None:
        """Exports the memory timeline as an HTML file which contains
        the memory timeline plot embedded as a PNG file."""
@ -1152,14 +1152,15 @@ class MemoryProfileTimeline:
        import matplotlib.pyplot as plt
        import numpy as np

-        mt = self._coalesce_timeline(device)
+        mt = self._coalesce_timeline(device_str)
        times, sizes = np.array(mt[0]), np.array(mt[1])
        # For this timeline, start at 0 to match Chrome traces.
        t_min = min(times)
        times -= t_min
        stacked = np.cumsum(sizes, axis=1) / 1024**3
-        max_memory_allocated = torch.cuda.max_memory_allocated()
-        max_memory_reserved = torch.cuda.max_memory_reserved()
+        device = torch.device(device_str)
+        max_memory_allocated = torch.cuda.max_memory_allocated(device)
+        max_memory_reserved = torch.cuda.max_memory_reserved(device)

        # Plot memory timeline as stacked data
        fig = plt.figure(figsize=figsize, dpi=80)