mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "Add unified memory APIs for torch.accelerator (#152932)"
This reverts commit 15f1173e5d72d6d45faba4cecd135e0160f06c6f. Reverted https://github.com/pytorch/pytorch/pull/152932 on behalf of https://github.com/jithunnair-amd due to Broke ROCm periodic runs on MI300 e.g. https://github.com/pytorch/pytorch/actions/runs/16764977800/job/47470050573 ([comment](https://github.com/pytorch/pytorch/pull/138222#issuecomment-3164941815))
This commit is contained in:
@ -1,6 +1,5 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <c10/core/CachingDeviceAllocator.h>
|
|
||||||
#include <c10/core/DeviceType.h>
|
#include <c10/core/DeviceType.h>
|
||||||
#include <c10/macros/Macros.h>
|
#include <c10/macros/Macros.h>
|
||||||
|
|
||||||
@ -73,27 +72,6 @@ TORCH_API c10::DeviceIndex exchangeDevice(c10::DeviceIndex device_index);
|
|||||||
// original device index that was active before the change.
|
// original device index that was active before the change.
|
||||||
TORCH_API c10::DeviceIndex maybeExchangeDevice(c10::DeviceIndex device_index);
|
TORCH_API c10::DeviceIndex maybeExchangeDevice(c10::DeviceIndex device_index);
|
||||||
|
|
||||||
TORCH_API inline void emptyCache() {
|
|
||||||
const auto device_type = getAccelerator(true).value();
|
|
||||||
at::getDeviceAllocator(device_type)->emptyCache();
|
|
||||||
}
|
|
||||||
|
|
||||||
TORCH_API inline at::CachingDeviceAllocator::DeviceStats getDeviceStats(
|
|
||||||
c10::DeviceIndex device_index) {
|
|
||||||
const auto device_type = getAccelerator(true).value();
|
|
||||||
return at::getDeviceAllocator(device_type)->getDeviceStats(device_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
TORCH_API inline void resetAccumulatedStats(c10::DeviceIndex device_index) {
|
|
||||||
const auto device_type = getAccelerator(true).value();
|
|
||||||
at::getDeviceAllocator(device_type)->resetAccumulatedStats(device_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
TORCH_API inline void resetPeakStats(c10::DeviceIndex device_index) {
|
|
||||||
const auto device_type = getAccelerator(true).value();
|
|
||||||
at::getDeviceAllocator(device_type)->resetPeakStats(device_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace at::accelerator
|
} // namespace at::accelerator
|
||||||
|
|
||||||
namespace at {
|
namespace at {
|
||||||
|
@ -25,26 +25,3 @@
|
|||||||
synchronize
|
synchronize
|
||||||
device_index
|
device_index
|
||||||
```
|
```
|
||||||
|
|
||||||
```{eval-rst}
|
|
||||||
.. automodule:: torch.accelerator.memory
|
|
||||||
```
|
|
||||||
```{eval-rst}
|
|
||||||
.. currentmodule:: torch.accelerator.memory
|
|
||||||
```
|
|
||||||
|
|
||||||
## Memory management
|
|
||||||
```{eval-rst}
|
|
||||||
.. autosummary::
|
|
||||||
:toctree: generated
|
|
||||||
:nosignatures:
|
|
||||||
|
|
||||||
empty_cache
|
|
||||||
max_memory_allocated
|
|
||||||
max_memory_reserved
|
|
||||||
memory_allocated
|
|
||||||
memory_reserved
|
|
||||||
memory_stats
|
|
||||||
reset_accumulated_memory_stats
|
|
||||||
reset_peak_memory_stats
|
|
||||||
```
|
|
||||||
|
@ -2435,11 +2435,6 @@ def _accelerator_synchronizeDevice(device_index: _int) -> None: ...
|
|||||||
def _accelerator_exchangeDevice(device_index: _int) -> _int: ...
|
def _accelerator_exchangeDevice(device_index: _int) -> _int: ...
|
||||||
def _accelerator_maybeExchangeDevice(device_index: _int) -> _int: ...
|
def _accelerator_maybeExchangeDevice(device_index: _int) -> _int: ...
|
||||||
def _accelerator_setAllocatorSettings(env: str) -> None: ...
|
def _accelerator_setAllocatorSettings(env: str) -> None: ...
|
||||||
def _accelerator_isAllocatorInitialized() -> _bool: ...
|
|
||||||
def _accelerator_emptyCache() -> None: ...
|
|
||||||
def _accelerator_getDeviceStats(device_index: _int) -> dict[str, Any]: ...
|
|
||||||
def _accelerator_resetAccumulatedStats(device_index: _int) -> None: ...
|
|
||||||
def _accelerator_resetPeakStats(device_index: _int) -> None: ...
|
|
||||||
|
|
||||||
# Defined in torch/csrc/jit/python/python_tracer.cpp
|
# Defined in torch/csrc/jit/python/python_tracer.cpp
|
||||||
class TracingState:
|
class TracingState:
|
||||||
|
@ -8,16 +8,6 @@ from typing_extensions import deprecated
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
from ._utils import _device_t, _get_device_index
|
from ._utils import _device_t, _get_device_index
|
||||||
from .memory import (
|
|
||||||
empty_cache,
|
|
||||||
max_memory_allocated,
|
|
||||||
max_memory_reserved,
|
|
||||||
memory_allocated,
|
|
||||||
memory_reserved,
|
|
||||||
memory_stats,
|
|
||||||
reset_accumulated_memory_stats,
|
|
||||||
reset_peak_memory_stats,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
@ -25,17 +15,9 @@ __all__ = [
|
|||||||
"current_device_idx", # deprecated
|
"current_device_idx", # deprecated
|
||||||
"current_device_index",
|
"current_device_index",
|
||||||
"current_stream",
|
"current_stream",
|
||||||
"empty_cache",
|
|
||||||
"device_count",
|
"device_count",
|
||||||
"device_index",
|
"device_index",
|
||||||
"is_available",
|
"is_available",
|
||||||
"max_memory_allocated",
|
|
||||||
"max_memory_reserved",
|
|
||||||
"memory_allocated",
|
|
||||||
"memory_reserved",
|
|
||||||
"memory_stats",
|
|
||||||
"reset_accumulated_memory_stats",
|
|
||||||
"reset_peak_memory_stats",
|
|
||||||
"set_device_idx", # deprecated
|
"set_device_idx", # deprecated
|
||||||
"set_device_index",
|
"set_device_index",
|
||||||
"set_stream",
|
"set_stream",
|
||||||
|
@ -1,201 +0,0 @@
|
|||||||
from collections import OrderedDict
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
import torch
|
|
||||||
|
|
||||||
from ._utils import _device_t, _get_device_index
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"empty_cache",
|
|
||||||
"max_memory_allocated",
|
|
||||||
"max_memory_reserved",
|
|
||||||
"memory_allocated",
|
|
||||||
"memory_reserved",
|
|
||||||
"memory_stats",
|
|
||||||
"reset_accumulated_memory_stats",
|
|
||||||
"reset_peak_memory_stats",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def empty_cache() -> None:
|
|
||||||
r"""Release all unoccupied cached memory currently held by the caching
|
|
||||||
allocator so that those can be used in other application.
|
|
||||||
|
|
||||||
.. note:: This function is a no-op if the memory allocator for the current
|
|
||||||
:ref:`accelerator <accelerators>` has not been initialized.
|
|
||||||
"""
|
|
||||||
if not torch._C._accelerator_isAllocatorInitialized():
|
|
||||||
return
|
|
||||||
torch._C._accelerator_emptyCache()
|
|
||||||
|
|
||||||
|
|
||||||
def memory_stats(device_index: _device_t = None, /) -> OrderedDict[str, Any]:
|
|
||||||
r"""Return a dictionary of accelerator device memory allocator statistics for a given device index.
|
|
||||||
|
|
||||||
The return value of this function is a dictionary of statistics, each of
|
|
||||||
which is a non-negative integer.
|
|
||||||
|
|
||||||
Core statistics:
|
|
||||||
|
|
||||||
- ``"allocated.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
|
|
||||||
number of allocation requests received by the memory allocator.
|
|
||||||
- ``"allocated_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
|
|
||||||
amount of allocated memory.
|
|
||||||
- ``"segment.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
|
|
||||||
number of reserved segments from device memory allocation.
|
|
||||||
- ``"reserved_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
|
|
||||||
amount of reserved memory.
|
|
||||||
- ``"active.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
|
|
||||||
number of active memory blocks.
|
|
||||||
- ``"active_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
|
|
||||||
amount of active memory.
|
|
||||||
- ``"inactive_split.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
|
|
||||||
number of inactive, non-releasable memory blocks.
|
|
||||||
- ``"inactive_split_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
|
|
||||||
amount of inactive, non-releasable memory.
|
|
||||||
|
|
||||||
For these core statistics, values are broken down as follows.
|
|
||||||
|
|
||||||
Pool type:
|
|
||||||
|
|
||||||
- ``all``: combined statistics across all memory pools.
|
|
||||||
- ``large_pool``: statistics for the large allocation pool
|
|
||||||
(as of June 2025, for size >= 1MB allocations).
|
|
||||||
- ``small_pool``: statistics for the small allocation pool
|
|
||||||
(as of June 2025, for size < 1MB allocations).
|
|
||||||
|
|
||||||
Metric type:
|
|
||||||
|
|
||||||
- ``current``: current value of this metric.
|
|
||||||
- ``peak``: maximum value of this metric.
|
|
||||||
- ``allocated``: historical total increase in this metric.
|
|
||||||
- ``freed``: historical total decrease in this metric.
|
|
||||||
|
|
||||||
In addition to the core statistics, we also provide some simple event
|
|
||||||
counters:
|
|
||||||
|
|
||||||
- ``"num_alloc_retries"``: number of failed device memory allocation calls that
|
|
||||||
result in a cache flush and retry.
|
|
||||||
- ``"num_ooms"``: number of out-of-memory errors thrown.
|
|
||||||
- ``"num_sync_all_streams"``: number of ``synchronize_and_free_events`` calls.
|
|
||||||
- ``"num_device_alloc"``: number of device memory allocation calls.
|
|
||||||
- ``"num_device_free"``: number of device memory free calls.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
device_index (:class:`torch.device`, str, int, optional): the index of the device to target.
|
|
||||||
If not given, use :func:`torch.accelerator.current_device_index` by default.
|
|
||||||
If a :class:`torch.device` or str is provided, its type must match the current
|
|
||||||
:ref:`accelerator<accelerators>` device type.
|
|
||||||
"""
|
|
||||||
if not torch._C._accelerator_isAllocatorInitialized():
|
|
||||||
return OrderedDict()
|
|
||||||
device_index = _get_device_index(device_index, optional=True)
|
|
||||||
stats = torch._C._accelerator_getDeviceStats(device_index)
|
|
||||||
flat_stats = []
|
|
||||||
|
|
||||||
def flatten(prefix: str, value: Any) -> None:
|
|
||||||
if isinstance(value, dict):
|
|
||||||
for k, v in value.items():
|
|
||||||
nested_prefix = f"{prefix}.{k}" if prefix else k
|
|
||||||
flatten(nested_prefix, v)
|
|
||||||
else:
|
|
||||||
flat_stats.append((prefix, value))
|
|
||||||
|
|
||||||
flatten("", stats)
|
|
||||||
flat_stats.sort()
|
|
||||||
return OrderedDict(flat_stats)
|
|
||||||
|
|
||||||
|
|
||||||
def memory_allocated(device_index: _device_t = None, /) -> int:
|
|
||||||
r"""Return the current :ref:`accelerator<accelerators>` device memory occupied by tensors
|
|
||||||
in bytes for a given device index.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
device_index (:class:`torch.device`, str, int, optional): the index of the device to target.
|
|
||||||
If not given, use :func:`torch.accelerator.current_device_index` by default.
|
|
||||||
If a :class:`torch.device` or str is provided, its type must match the current
|
|
||||||
:ref:`accelerator<accelerators>` device type.
|
|
||||||
"""
|
|
||||||
return memory_stats(device_index).get("allocated_bytes.all.current", 0)
|
|
||||||
|
|
||||||
|
|
||||||
def max_memory_allocated(device_index: _device_t = None, /) -> int:
|
|
||||||
r"""Return the current :ref:`accelerator<accelerators>` maximum device memory occupied by tensors
|
|
||||||
in bytes for a given device index.
|
|
||||||
|
|
||||||
By default, this returns the peak allocated memory since the beginning of
|
|
||||||
this program. :func:`~torch.accelerator.reset_peak_memory_stats` can be used to
|
|
||||||
reset the starting point in tracking this metric.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
device_index (:class:`torch.device`, str, int, optional): the index of the device to target.
|
|
||||||
If not given, use :func:`torch.accelerator.current_device_index` by default.
|
|
||||||
If a :class:`torch.device` or str is provided, its type must match the current
|
|
||||||
:ref:`accelerator<accelerators>` device type.
|
|
||||||
"""
|
|
||||||
return memory_stats(device_index).get("allocated_bytes.all.peak", 0)
|
|
||||||
|
|
||||||
|
|
||||||
def memory_reserved(device_index: _device_t = None, /) -> int:
|
|
||||||
r"""Return the current :ref:`accelerator<accelerators>` device memory managed by the caching allocator
|
|
||||||
in bytes for a given device index.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
device_index (:class:`torch.device`, str, int, optional): the index of the device to target.
|
|
||||||
If not given, use :func:`torch.accelerator.current_device_index` by default.
|
|
||||||
If a :class:`torch.device` or str is provided, its type must match the current
|
|
||||||
:ref:`accelerator<accelerators>` device type.
|
|
||||||
"""
|
|
||||||
return memory_stats(device_index).get("reserved_bytes.all.current", 0)
|
|
||||||
|
|
||||||
|
|
||||||
def max_memory_reserved(device_index: _device_t = None, /) -> int:
|
|
||||||
r"""Return the current :ref:`accelerator<accelerators>` maximum device memory managed by the caching allocator
|
|
||||||
in bytes for a given device index.
|
|
||||||
|
|
||||||
By default, this returns the peak cached memory since the beginning of this
|
|
||||||
program. :func:`~torch.accelerator.reset_peak_memory_stats` can be used to reset
|
|
||||||
the starting point in tracking this metric.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
device_index (:class:`torch.device`, str, int, optional): the index of the device to target.
|
|
||||||
If not given, use :func:`torch.accelerator.current_device_index` by default.
|
|
||||||
If a :class:`torch.device` or str is provided, its type must match the current
|
|
||||||
:ref:`accelerator<accelerators>` device type.
|
|
||||||
"""
|
|
||||||
return memory_stats(device_index).get("reserved_bytes.all.peak", 0)
|
|
||||||
|
|
||||||
|
|
||||||
def reset_accumulated_memory_stats(device_index: _device_t = None, /) -> None:
|
|
||||||
r"""Reset the "accumulated" (historical) stats tracked by the current :ref:`accelerator<accelerators>`
|
|
||||||
memory allocator for a given device index.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
device_index (:class:`torch.device`, str, int, optional): the index of the device to target.
|
|
||||||
If not given, use :func:`torch.accelerator.current_device_index` by default.
|
|
||||||
If a :class:`torch.device` or str is provided, its type must match the current
|
|
||||||
:ref:`accelerator<accelerators>` device type.
|
|
||||||
|
|
||||||
.. note:: This function is a no-op if the memory allocator for the current
|
|
||||||
:ref:`accelerator <accelerators>` has not been initialized.
|
|
||||||
"""
|
|
||||||
device_index = _get_device_index(device_index, optional=True)
|
|
||||||
return torch._C._accelerator_resetAccumulatedStats(device_index)
|
|
||||||
|
|
||||||
|
|
||||||
def reset_peak_memory_stats(device_index: _device_t = None, /) -> None:
|
|
||||||
r"""Reset the "peak" stats tracked by the current :ref:`accelerator<accelerators>`
|
|
||||||
memory allocator for a given device index.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
device_index (:class:`torch.device`, str, int, optional): the index of the device to target.
|
|
||||||
If not given, use :func:`torch.accelerator.current_device_index` by default.
|
|
||||||
If a :class:`torch.device` or str is provided, its type must match the current
|
|
||||||
:ref:`accelerator<accelerators>` device type.
|
|
||||||
|
|
||||||
.. note:: This function is a no-op if the memory allocator for the current
|
|
||||||
:ref:`accelerator <accelerators>` has not been initialized.
|
|
||||||
"""
|
|
||||||
device_index = _get_device_index(device_index, optional=True)
|
|
||||||
return torch._C._accelerator_resetPeakStats(device_index)
|
|
@ -77,70 +77,6 @@ void initModule(PyObject* module) {
|
|||||||
m.def("_accelerator_setAllocatorSettings", [](std::string env) {
|
m.def("_accelerator_setAllocatorSettings", [](std::string env) {
|
||||||
c10::CachingAllocator::setAllocatorSettings(env);
|
c10::CachingAllocator::setAllocatorSettings(env);
|
||||||
});
|
});
|
||||||
|
|
||||||
m.def("_accelerator_isAllocatorInitialized", []() {
|
|
||||||
const auto device_type = at::accelerator::getAccelerator(true).value();
|
|
||||||
return at::getDeviceAllocator(device_type)->initialized();
|
|
||||||
});
|
|
||||||
|
|
||||||
m.def("_accelerator_emptyCache", []() { at::accelerator::emptyCache(); });
|
|
||||||
|
|
||||||
m.def("_accelerator_getDeviceStats", [](c10::DeviceIndex device_index) {
|
|
||||||
using c10::CachingAllocator::Stat;
|
|
||||||
using c10::CachingAllocator::StatArray;
|
|
||||||
using c10::CachingAllocator::StatType;
|
|
||||||
using c10::CachingDeviceAllocator::DeviceStats;
|
|
||||||
|
|
||||||
const auto stats = at::accelerator::getDeviceStats(device_index);
|
|
||||||
const auto stat_to_dict = [](const Stat& stat) -> py::dict {
|
|
||||||
py::dict dict;
|
|
||||||
dict["current"] = stat.current;
|
|
||||||
dict["peak"] = stat.peak;
|
|
||||||
dict["allocated"] = stat.allocated;
|
|
||||||
dict["freed"] = stat.freed;
|
|
||||||
return dict;
|
|
||||||
};
|
|
||||||
|
|
||||||
const auto stat_array_to_dict = [=](const StatArray& stats) -> py::dict {
|
|
||||||
const std::array<const char*, static_cast<size_t>(StatType::NUM_TYPES)>
|
|
||||||
kStatTypeNames = {"all", "small_pool", "large_pool"};
|
|
||||||
py::dict dict;
|
|
||||||
for (const auto i : c10::irange(kStatTypeNames.size())) {
|
|
||||||
dict[kStatTypeNames[i]] = stat_to_dict(stats[i]);
|
|
||||||
}
|
|
||||||
return dict;
|
|
||||||
};
|
|
||||||
|
|
||||||
py::dict result;
|
|
||||||
result["num_alloc_retries"] = stats.num_alloc_retries;
|
|
||||||
result["num_ooms"] = stats.num_ooms;
|
|
||||||
result["max_split_size"] = stats.max_split_size;
|
|
||||||
result["num_sync_all_streams"] = stats.num_sync_all_streams;
|
|
||||||
result["num_device_alloc"] = stats.num_device_alloc;
|
|
||||||
result["num_device_free"] = stats.num_device_free;
|
|
||||||
result["allocated_bytes"] = stat_array_to_dict(stats.allocated_bytes);
|
|
||||||
result["reserved_bytes"] = stat_array_to_dict(stats.reserved_bytes);
|
|
||||||
result["active_bytes"] = stat_array_to_dict(stats.active_bytes);
|
|
||||||
result["requested_bytes"] = stat_array_to_dict(stats.requested_bytes);
|
|
||||||
result["allocation"] = stat_array_to_dict(stats.allocation);
|
|
||||||
result["segment"] = stat_array_to_dict(stats.segment);
|
|
||||||
result["active"] = stat_array_to_dict(stats.active);
|
|
||||||
result["inactive_split"] = stat_array_to_dict(stats.inactive_split);
|
|
||||||
result["inactive_split_bytes"] =
|
|
||||||
stat_array_to_dict(stats.inactive_split_bytes);
|
|
||||||
result["oversize_allocations"] = stat_to_dict(stats.oversize_allocations);
|
|
||||||
result["oversize_segments"] = stat_to_dict(stats.oversize_segments);
|
|
||||||
return result;
|
|
||||||
});
|
|
||||||
|
|
||||||
m.def(
|
|
||||||
"_accelerator_resetAccumulatedStats", [](c10::DeviceIndex device_index) {
|
|
||||||
at::accelerator::resetAccumulatedStats(device_index);
|
|
||||||
});
|
|
||||||
|
|
||||||
m.def("_accelerator_resetPeakStats", [](c10::DeviceIndex device_index) {
|
|
||||||
at::accelerator::resetPeakStats(device_index);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace torch::accelerator
|
} // namespace torch::accelerator
|
||||||
|
@ -255,9 +255,9 @@ def memory_stats(device: "Device" = None) -> dict[str, Any]:
|
|||||||
|
|
||||||
- ``all``: combined statistics across all memory pools.
|
- ``all``: combined statistics across all memory pools.
|
||||||
- ``large_pool``: statistics for the large allocation pool
|
- ``large_pool``: statistics for the large allocation pool
|
||||||
(as of June 2025, for size >= 1MB allocations).
|
(as of October 2019, for size >= 1MB allocations).
|
||||||
- ``small_pool``: statistics for the small allocation pool
|
- ``small_pool``: statistics for the small allocation pool
|
||||||
(as of June 2025, for size < 1MB allocations).
|
(as of October 2019, for size < 1MB allocations).
|
||||||
|
|
||||||
Metric type:
|
Metric type:
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user