mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Use posix_fallocate() to reserve disk space for shared memory (#161910)
Shared memory is allocated by creating a file in /dev/shm (by default) that can run out of space. Pytorch reserves the file size by calling ftruncate() that creates a sparse file, so it succeeds even if sufficient disk space is not available. This could lead to a situation when a shared memory region is successfully created but a subsequent access to a shared memory page results in SIGBUS due to the disk being full. Using posix_fallocate() instead of ftruncate() eliminates this problem because the former syscall always allocates space and it returns an error if the disk is full. Related to https://github.com/pytorch/pytorch/issues/5040 Pull Request resolved: https://github.com/pytorch/pytorch/pull/161910 Approved by: https://github.com/mikaylagawarecki
This commit is contained in:
committed by
PyTorch MergeBot
parent
4661200125
commit
22b1710252
@ -605,6 +605,11 @@ if(UNIX)
|
||||
if(HAVE_MALLOC_USABLE_SIZE)
|
||||
add_definitions(-DHAVE_MALLOC_USABLE_SIZE=1)
|
||||
endif(HAVE_MALLOC_USABLE_SIZE)
|
||||
set(CMAKE_EXTRA_INCLUDE_FILES "fcntl.h")
|
||||
CHECK_FUNCTION_EXISTS(posix_fallocate HAVE_POSIX_FALLOCATE)
|
||||
if(HAVE_POSIX_FALLOCATE)
|
||||
add_definitions(-DHAVE_POSIX_FALLOCATE=1)
|
||||
endif(HAVE_POSIX_FALLOCATE)
|
||||
endif(UNIX)
|
||||
|
||||
ADD_DEFINITIONS(-DUSE_EXTERNAL_MZCRC)
|
||||
|
@ -292,6 +292,28 @@ MapAllocator::MapAllocator(WithFd, std::string_view filename, int fd, int flags,
|
||||
if (ftruncate(fd, static_cast<off_t>(size)) == -1) {
|
||||
TORCH_CHECK(false, "unable to resize file <", filename_, "> to the right size: ", c10::utils::str_error(errno), " (", errno, ")");
|
||||
}
|
||||
|
||||
#ifdef HAVE_POSIX_FALLOCATE
|
||||
if (flags_ & ALLOCATOR_MAPPED_SHAREDMEM) {
|
||||
for (;;) {
|
||||
if (posix_fallocate(fd, 0, static_cast<off_t>(size)) == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (errno == EINTR) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (errno == EINVAL || errno == EOPNOTSUPP) {
|
||||
// the underlying filesystem does not support the operation
|
||||
break;
|
||||
}
|
||||
|
||||
TORCH_CHECK(false, "unable to allocate shared memory(shm) for file <", filename_, ">: ", c10::utils::str_error(errno), " (", errno, ")");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (fstat(fd, &file_stat) == -1 || file_stat.st_size < static_cast<int64_t>(size)) {
|
||||
#ifndef STRIP_ERROR_MESSAGES
|
||||
int last_err = errno;
|
||||
|
@ -1541,6 +1541,11 @@ if(NOT INTERN_BUILD_MOBILE)
|
||||
if(HAVE_MALLOC_USABLE_SIZE)
|
||||
add_definitions(-DHAVE_MALLOC_USABLE_SIZE=1)
|
||||
endif(HAVE_MALLOC_USABLE_SIZE)
|
||||
set(CMAKE_EXTRA_INCLUDE_FILES "fcntl.h")
|
||||
CHECK_FUNCTION_EXISTS(posix_fallocate HAVE_POSIX_FALLOCATE)
|
||||
if(HAVE_POSIX_FALLOCATE)
|
||||
add_definitions(-DHAVE_POSIX_FALLOCATE=1)
|
||||
endif(HAVE_POSIX_FALLOCATE)
|
||||
endif(UNIX)
|
||||
|
||||
add_definitions(-DUSE_EXTERNAL_MZCRC)
|
||||
|
Reference in New Issue
Block a user