mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Refine XPU allocator message when OOM (#165509)
# Motivation Provide more information and align with other backends to enhance the user experience. Pull Request resolved: https://github.com/pytorch/pytorch/pull/165509 Approved by: https://github.com/EikanWang ghstack dependencies: #165508
This commit is contained in:
committed by
PyTorch MergeBot
parent
d7ffa8b8a2
commit
d0c32971b4
@ -433,6 +433,18 @@ class DeviceCachingAllocator {
|
||||
c10::xpu::DeviceProp device_prop;
|
||||
c10::xpu::get_device_properties(&device_prop, device);
|
||||
auto device_total = device_prop.global_mem_size;
|
||||
// Estimate the available device memory when the SYCL runtime does not
|
||||
// support the corresponding aspect (ext_intel_free_memory).
|
||||
size_t device_free = device_prop.global_mem_size -
|
||||
stats.reserved_bytes[static_cast<size_t>(StatType::AGGREGATE)]
|
||||
.current;
|
||||
auto& raw_device = c10::xpu::get_raw_device(device);
|
||||
// TODO: Remove the aspect check once the SYCL runtime bug is fixed on
|
||||
// affected devices.
|
||||
if (raw_device.has(sycl::aspect::ext_intel_free_memory)) {
|
||||
device_free =
|
||||
raw_device.get_info<sycl::ext::intel::info::device::free_memory>();
|
||||
}
|
||||
auto allocated_bytes =
|
||||
stats.allocated_bytes[static_cast<size_t>(StatType::AGGREGATE)]
|
||||
.current;
|
||||
@ -455,7 +467,9 @@ class DeviceCachingAllocator {
|
||||
static_cast<int>(device),
|
||||
" has a total capacity of ",
|
||||
format_size(device_total),
|
||||
". Of the allocated memory ",
|
||||
" of which ",
|
||||
format_size(device_free),
|
||||
" is free. Of the allocated memory ",
|
||||
format_size(allocated_bytes),
|
||||
" is allocated by PyTorch, and ",
|
||||
format_size(reserved_bytes - allocated_bytes),
|
||||
|
Reference in New Issue
Block a user