mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-22 06:11:27 +08:00
Fix the missing device in _memory_profiler (#119751)
Fixes #119722,
1, added the missing device in
```
max_memory_allocated = torch.cuda.max_memory_allocated()
max_memory_reserved = torch.cuda.max_memory_reserved()
```
2, fix the device parameter to device_str. Based on [lines](2bda6b4cb8/torch/profiler/profiler.py (L291)
), the input device are a string (device_str) for
```
self.mem_tl.export_memory_timeline_html
self.mem_tl.export_memory_timeline_raw
self.mem_tl.export_memory_timeline
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/119751
Approved by: https://github.com/aaronenyeshi
This commit is contained in:
committed by
PyTorch MergeBot
parent
98fd23cccc
commit
143b5f2745
@ -1053,11 +1053,11 @@ class MemoryProfileTimeline:
|
||||
times = [t_min if t < 0 else t for t in times]
|
||||
return times, sizes
|
||||
|
||||
def export_memory_timeline(self, path, device) -> None:
|
||||
def export_memory_timeline(self, path, device_str) -> None:
|
||||
"""Saves the memory timeline as [times, sizes by category]
|
||||
as a JSON formatted file to the given path for the given
|
||||
device."""
|
||||
times, sizes = self._coalesce_timeline(device)
|
||||
times, sizes = self._coalesce_timeline(device_str)
|
||||
# TODO: Write a faster serialize (orjson not available in CI)
|
||||
import json
|
||||
|
||||
@ -1131,7 +1131,7 @@ class MemoryProfileTimeline:
|
||||
json.dump(raw_events, f)
|
||||
|
||||
def export_memory_timeline_html(
|
||||
self, path, device, figsize=(20, 12), title=None
|
||||
self, path, device_str, figsize=(20, 12), title=None
|
||||
) -> None:
|
||||
"""Exports the memory timeline as an HTML file which contains
|
||||
the memory timeline plot embedded as a PNG file."""
|
||||
@ -1152,14 +1152,15 @@ class MemoryProfileTimeline:
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
mt = self._coalesce_timeline(device)
|
||||
mt = self._coalesce_timeline(device_str)
|
||||
times, sizes = np.array(mt[0]), np.array(mt[1])
|
||||
# For this timeline, start at 0 to match Chrome traces.
|
||||
t_min = min(times)
|
||||
times -= t_min
|
||||
stacked = np.cumsum(sizes, axis=1) / 1024**3
|
||||
max_memory_allocated = torch.cuda.max_memory_allocated()
|
||||
max_memory_reserved = torch.cuda.max_memory_reserved()
|
||||
device = torch.device(device_str)
|
||||
max_memory_allocated = torch.cuda.max_memory_allocated(device)
|
||||
max_memory_reserved = torch.cuda.max_memory_reserved(device)
|
||||
|
||||
# Plot memory timeline as stacked data
|
||||
fig = plt.figure(figsize=figsize, dpi=80)
|
||||
|
Reference in New Issue
Block a user