Fix the missing device in _memory_profiler (#119751)

Fixes #119722,
1, added the missing device in
```
max_memory_allocated = torch.cuda.max_memory_allocated()
max_memory_reserved = torch.cuda.max_memory_reserved()
```
2, fix the device parameter to device_str. Based on [lines](2bda6b4cb8/torch/profiler/profiler.py (L291)), the input device are a string (device_str) for
```
self.mem_tl.export_memory_timeline_html
self.mem_tl.export_memory_timeline_raw
self.mem_tl.export_memory_timeline
```

Pull Request resolved: https://github.com/pytorch/pytorch/pull/119751
Approved by: https://github.com/aaronenyeshi
This commit is contained in:
lancerts
2024-02-15 19:11:13 +00:00
committed by PyTorch MergeBot
parent 98fd23cccc
commit 143b5f2745

View File

@ -1053,11 +1053,11 @@ class MemoryProfileTimeline:
times = [t_min if t < 0 else t for t in times]
return times, sizes
def export_memory_timeline(self, path, device) -> None:
def export_memory_timeline(self, path, device_str) -> None:
"""Saves the memory timeline as [times, sizes by category]
as a JSON formatted file to the given path for the given
device."""
times, sizes = self._coalesce_timeline(device)
times, sizes = self._coalesce_timeline(device_str)
# TODO: Write a faster serialize (orjson not available in CI)
import json
@ -1131,7 +1131,7 @@ class MemoryProfileTimeline:
json.dump(raw_events, f)
def export_memory_timeline_html(
self, path, device, figsize=(20, 12), title=None
self, path, device_str, figsize=(20, 12), title=None
) -> None:
"""Exports the memory timeline as an HTML file which contains
the memory timeline plot embedded as a PNG file."""
@ -1152,14 +1152,15 @@ class MemoryProfileTimeline:
import matplotlib.pyplot as plt
import numpy as np
mt = self._coalesce_timeline(device)
mt = self._coalesce_timeline(device_str)
times, sizes = np.array(mt[0]), np.array(mt[1])
# For this timeline, start at 0 to match Chrome traces.
t_min = min(times)
times -= t_min
stacked = np.cumsum(sizes, axis=1) / 1024**3
max_memory_allocated = torch.cuda.max_memory_allocated()
max_memory_reserved = torch.cuda.max_memory_reserved()
device = torch.device(device_str)
max_memory_allocated = torch.cuda.max_memory_allocated(device)
max_memory_reserved = torch.cuda.max_memory_reserved(device)
# Plot memory timeline as stacked data
fig = plt.figure(figsize=figsize, dpi=80)