Use posix_fallocate() to reserve disk space for shared memory (#161910)

Shared memory is allocated by creating a file in /dev/shm (by default) that can run out of space. Pytorch reserves the file size by calling ftruncate() that creates a sparse file, so it succeeds even if sufficient disk space is not available.

This could lead to a situation when a shared memory region is successfully created but a subsequent access to a shared memory page results in SIGBUS due to the disk being full.

Using posix_fallocate() instead of ftruncate() eliminates this problem because the former syscall always allocates space and it returns an error if the disk is full.

Related to https://github.com/pytorch/pytorch/issues/5040
Pull Request resolved: https://github.com/pytorch/pytorch/pull/161910
Approved by: https://github.com/mikaylagawarecki
This commit is contained in:
Jian Wen
2025-10-02 19:12:57 +00:00
committed by PyTorch MergeBot
parent 4661200125
commit 22b1710252
3 changed files with 32 additions and 0 deletions

View File

@ -605,6 +605,11 @@ if(UNIX)
if(HAVE_MALLOC_USABLE_SIZE)
add_definitions(-DHAVE_MALLOC_USABLE_SIZE=1)
endif(HAVE_MALLOC_USABLE_SIZE)
set(CMAKE_EXTRA_INCLUDE_FILES "fcntl.h")
CHECK_FUNCTION_EXISTS(posix_fallocate HAVE_POSIX_FALLOCATE)
if(HAVE_POSIX_FALLOCATE)
add_definitions(-DHAVE_POSIX_FALLOCATE=1)
endif(HAVE_POSIX_FALLOCATE)
endif(UNIX)
ADD_DEFINITIONS(-DUSE_EXTERNAL_MZCRC)

View File

@ -292,6 +292,28 @@ MapAllocator::MapAllocator(WithFd, std::string_view filename, int fd, int flags,
if (ftruncate(fd, static_cast<off_t>(size)) == -1) {
TORCH_CHECK(false, "unable to resize file <", filename_, "> to the right size: ", c10::utils::str_error(errno), " (", errno, ")");
}
#ifdef HAVE_POSIX_FALLOCATE
if (flags_ & ALLOCATOR_MAPPED_SHAREDMEM) {
for (;;) {
if (posix_fallocate(fd, 0, static_cast<off_t>(size)) == 0) {
break;
}
if (errno == EINTR) {
continue;
}
if (errno == EINVAL || errno == EOPNOTSUPP) {
// the underlying filesystem does not support the operation
break;
}
TORCH_CHECK(false, "unable to allocate shared memory(shm) for file <", filename_, ">: ", c10::utils::str_error(errno), " (", errno, ")");
}
}
#endif
if (fstat(fd, &file_stat) == -1 || file_stat.st_size < static_cast<int64_t>(size)) {
#ifndef STRIP_ERROR_MESSAGES
int last_err = errno;

View File

@ -1541,6 +1541,11 @@ if(NOT INTERN_BUILD_MOBILE)
if(HAVE_MALLOC_USABLE_SIZE)
add_definitions(-DHAVE_MALLOC_USABLE_SIZE=1)
endif(HAVE_MALLOC_USABLE_SIZE)
set(CMAKE_EXTRA_INCLUDE_FILES "fcntl.h")
CHECK_FUNCTION_EXISTS(posix_fallocate HAVE_POSIX_FALLOCATE)
if(HAVE_POSIX_FALLOCATE)
add_definitions(-DHAVE_POSIX_FALLOCATE=1)
endif(HAVE_POSIX_FALLOCATE)
endif(UNIX)
add_definitions(-DUSE_EXTERNAL_MZCRC)