[nativert] Move file_util to pytorch core (#153162)

Summary: fbcode//sigmoid/core/common -> fbcode//caffe2/torch/nativert/common

Test Plan: Github CI

Differential Revision: D74328089

Pull Request resolved: https://github.com/pytorch/pytorch/pull/153162
Approved by: https://github.com/zhxchen17
This commit is contained in:
Georgia Phillips
2025-05-27 03:42:47 +00:00
committed by PyTorch MergeBot
parent 70d12ccc3f
commit f8010e7b93
5 changed files with 578 additions and 0 deletions

View File

@ -594,6 +594,7 @@ libtorch_nativert_sources = [
"torch/nativert/graph/TensorMeta.cpp",
"torch/nativert/executor/Placement.cpp",
"torch/nativert/executor/PlacementUtils.cpp",
"torch/nativert/common/FileUtil.cpp",
]
torch_mobile_tracer_sources = [

View File

@ -8,6 +8,7 @@ set(NATIVERT_TEST_SRCS
${TORCH_ROOT}/torch/nativert/graph/TensorMeta.cpp
${TORCH_ROOT}/torch/nativert/graph/GraphSignature.cpp
${TORCH_ROOT}/torch/nativert/executor/PlacementUtils.cpp
${TORCH_ROOT}/torch/nativert/common/FileUtil.cpp
)
add_executable(test_nativert

View File

@ -0,0 +1,111 @@
#include <gtest/gtest.h>
#include <torch/nativert/common/FileUtil.h>
#include <fstream>
namespace torch {
namespace nativert {
TEST(FileUtilTest, OpenNoInt) {
// Create a temporary file
std::ofstream tmpFile("tmp_file.txt");
tmpFile.close();
int fd = openNoInt("tmp_file.txt", O_RDONLY, 0);
ASSERT_GE(fd, 0);
closeNoInt(fd);
}
TEST(FileUtilTest, CloseNoInt) {
// Create a temporary file
std::ofstream tmpFile("tmp_file.txt");
tmpFile.close();
int fd = openNoInt("tmp_file.txt", O_RDONLY, 0);
ASSERT_GE(fd, 0);
int result = closeNoInt(fd);
ASSERT_EQ(result, 0);
}
TEST(FileUtilTest, WriteFull) {
// Create a temporary file
std::ofstream tmpFile("tmp_file.txt");
tmpFile.close();
int fd = openNoInt("tmp_file.txt", O_WRONLY | O_CREAT, 0644);
ASSERT_GE(fd, 0);
const char* data = "Hello, World!";
ssize_t bytesWritten = writeFull(fd, data, strlen(data));
ASSERT_EQ(bytesWritten, strlen(data));
closeNoInt(fd);
}
TEST(FileUtilTest, ReadFull) {
// Create a temporary file
std::ofstream tmpFile("tmp_file.txt");
tmpFile << "Hello, World!";
tmpFile.close();
int fd = openNoInt("tmp_file.txt", O_RDONLY, 0);
ASSERT_GE(fd, 0);
char buffer[1024];
ssize_t bytesRead = readFull(fd, buffer, 1024);
ASSERT_EQ(bytesRead, 13); // length of "Hello, World!"
closeNoInt(fd);
}
TEST(FileUtilTest, FileConstructor) {
// Create a temporary file
std::ofstream tmpFile("tmp_file.txt");
tmpFile.close();
File file("tmp_file.txt", O_RDONLY, 0);
ASSERT_GE(file.fd(), 0);
file.close();
}
TEST(FileUtilTest, FileMoveConstructor) {
// Create a temporary file
std::ofstream tmpFile("tmp_file.txt");
tmpFile.close();
File file1("tmp_file.txt", O_RDONLY, 0);
File file2(std::move(file1));
ASSERT_GE(file2.fd(), 0);
ASSERT_EQ(file1.fd(), -1);
file2.close();
}
TEST(FileUtilTest, FileAssignmentOperator) {
// Create a temporary file
std::ofstream tmpFile("tmp_file.txt");
tmpFile.close();
File file1("tmp_file.txt", O_RDONLY, 0);
File file2;
file2 = std::move(file1);
ASSERT_GE(file2.fd(), 0);
ASSERT_EQ(file1.fd(), -1);
file2.close();
}
TEST(FileUtilTest, TemporaryFile) {
File file = File::temporary();
ASSERT_GE(file.fd(), 0);
file.close();
}
} // namespace nativert
} // namespace torch

View File

@ -0,0 +1,207 @@
#include <torch/nativert/common/FileUtil.h>
#ifdef _WIN32
#include <io.h>
#define open _open
#define read _read
#define write _write
#define fileno _fileno
#define dup _dup
#else
#include <unistd.h>
#endif
#include <cerrno>
#include <fmt/core.h>
namespace torch::nativert {
namespace {
int unistd_close(int fh) {
#ifdef _WIN32
return ::_close(fh);
#else
return ::close(fh);
#endif
}
inline void incr(ssize_t) {}
template <typename Offset>
inline void incr(ssize_t n, Offset& offset) {
offset += static_cast<Offset>(n);
}
// Wrap call to read/pread/write/pwrite(fd, buf, count, offset?) to retry on
// incomplete reads / writes. The variadic argument magic is there to support
// an additional argument (offset) for pread / pwrite; see the incr() functions
// above which do nothing if the offset is not present and increment it if it
// is.
template <class F, class... Offset>
ssize_t wrapFull(F f, int fd, void* buf, size_t count, Offset... offset) {
char* b = static_cast<char*>(buf);
ssize_t totalBytes = 0;
ssize_t r = -1;
do {
r = f(fd, b, count, offset...);
if (r == -1) {
if (errno == EINTR) {
continue;
}
return r;
}
totalBytes += r;
b += r;
count -= r;
incr(r, offset...);
} while (r != 0 && count); // 0 means EOF
return totalBytes;
}
int filterCloseReturn(int r) {
// Ignore EINTR. On Linux, close() may only return EINTR after the file
// descriptor has been closed, so you must not retry close() on EINTR --
// in the best case, you'll get EBADF, and in the worst case, you'll end up
// closing a different file (one opened from another thread).
//
// Interestingly enough, the Single Unix Specification says that the state
// of the file descriptor is unspecified if close returns EINTR. In that
// case, the safe thing to do is also not to retry close() -- leaking a file
// descriptor is definitely better than closing the wrong file.
if (r == -1 && errno == EINTR) {
return 0;
}
return r;
}
// The following wrapX() funcions are private functions for wrapping file-io
// against interrupt and partial op completions.
// Wrap call to f(args) in loop to retry on EINTR
template <class F, class... Args>
ssize_t wrapNoInt(F f, Args... args) {
ssize_t r = -1;
do {
r = f(std::forward<Args>(args)...);
} while (r == -1 && errno == EINTR);
return r;
}
} // namespace
int openNoInt(const char* name, int flags, mode_t mode) {
// Android NDK bionic with FORTIFY has this definition:
// https://android.googlesource.com/platform/bionic/+/9349b9e51b/libc/include/bits/fortify/fcntl.h
// ```
// __BIONIC_ERROR_FUNCTION_VISIBILITY
// int open(const char* pathname, int flags, mode_t modes, ...) __overloadable
// __errorattr(__open_too_many_args_error);
// ```
// This is originally to prevent open() with incorrect parameters.
//
// However, combined with folly wrapNotInt, template deduction will fail.
// In this case, we create a custom lambda to bypass the error.
// The solution is referenced from
// https://github.com/llvm/llvm-project/commit/0a0e411204a2baa520fd73a8d69b664f98b428ba
//
auto openWrapper = [&] { return open(name, flags, mode); };
return int(wrapNoInt(openWrapper));
}
int closeNoInt(int fd) {
return filterCloseReturn(unistd_close(fd));
}
ssize_t writeFull(int fd, const void* buf, size_t count) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
return wrapFull(write, fd, const_cast<void*>(buf), count);
}
ssize_t readFull(int fd, void* buf, size_t count) {
return wrapFull(read, fd, buf, count);
}
File::File(int fd, bool ownsFd) noexcept : fd_(fd), ownsFd_(ownsFd) {
TORCH_CHECK(fd >= -1, "fd must be -1 or non-negative");
TORCH_CHECK(fd != -1 || !ownsFd, "cannot own -1");
}
File::File(std::string_view name, int flags, mode_t mode)
: fd_(::open(std::string(name).c_str(), flags, mode)), ownsFd_(false) {
if (fd_ == -1) {
throw std::runtime_error(fmt::format(
"open(\"{}\", {}, 0{}) failed with errno {}.",
name,
flags,
mode,
errno));
}
ownsFd_ = true;
}
File::File(File&& other) noexcept : fd_(other.fd_), ownsFd_(other.ownsFd_) {
other.release();
}
File& File::operator=(File&& other) noexcept {
closeNoThrow();
swap(other);
return *this;
}
File::~File() {
auto fd = fd_;
if (!closeNoThrow()) { // ignore most errors
TORCH_CHECK(
errno != EBADF,
"closing fd ",
fd,
", it may already ",
"have been closed. Another time, this might close the wrong FD.");
}
}
/* static */ File File::temporary() {
// make a temp file with tmpfile(), dup the fd, then return it in a File.
FILE* tmpFile = tmpfile();
if (!tmpFile) {
throw std::runtime_error("tmpfile() failed");
}
auto guard = c10::make_scope_exit([&]() { fclose(tmpFile); });
int fd = ::dup(fileno(tmpFile));
if (fd == -1) {
throw std::runtime_error("dup() failed");
}
return File(fd, true);
}
int File::release() noexcept {
int released = fd_;
fd_ = -1;
ownsFd_ = false;
return released;
}
void File::swap(File& other) noexcept {
using std::swap;
swap(fd_, other.fd_);
swap(ownsFd_, other.ownsFd_);
}
void File::close() {
if (!closeNoThrow()) {
throw std::runtime_error("close() failed");
}
}
[[nodiscard]] bool File::closeNoThrow() {
int r = ownsFd_ ? unistd_close(fd_) : 0;
release();
return r == 0;
}
} // namespace torch::nativert

View File

@ -0,0 +1,258 @@
#pragma once
/*
* Ported from folly/FileUtil.h
*/
#include <limits>
#include <string_view>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
// Copied from folly/portability/SysTypes.h
#ifdef _WIN32
#include <basetsd.h>
// This is a massive pain to have be an `int` due to the pthread implementation
// we support, but it's far more compatible with the rest of the windows world
// as an `int` than it would be as a `void*`
using pid_t = int;
using uid_t = int;
using gid_t = int;
// This isn't actually supposed to be defined here, but it's the most
// appropriate place without defining a portability header for stdint.h
// with just this single typedef.
using ssize_t = SSIZE_T;
#ifndef HAVE_MODE_T
#define HAVE_MODE_T 1
// The Windows headers don't define this anywhere, nor do any of the libs
// that Folly depends on, so define it here.
using mode_t = unsigned int;
#endif
// Copied from folly/portability/Fcntl.h
#define O_CLOEXEC _O_NOINHERIT
#endif
#include <c10/util/Exception.h>
#include <c10/util/ScopeExit.h>
namespace torch::nativert {
class File {
public:
/**
* Creates an empty File object, for late initialization.
*/
constexpr File() noexcept : fd_(-1), ownsFd_(false) {}
/**
* Create a File object from an existing file descriptor.
*
* @param fd Existing file descriptor
* @param ownsFd Takes ownership of the file descriptor if ownsFd is true.
*/
explicit File(int fd, bool ownsFd = false) noexcept;
/**
* Open and create a file object. Throws on error.
* Owns the file descriptor implicitly.
*/
explicit File(
std::string_view name,
int flags = O_RDONLY,
mode_t mode = 0666);
~File();
/**
* Create and return a temporary, owned file (uses tmpfile()).
*/
static File temporary();
/**
* Return the file descriptor, or -1 if the file was closed.
*/
int fd() const {
return fd_;
}
/**
* Returns 'true' iff the file was successfully opened.
*/
explicit operator bool() const {
return fd_ != -1;
}
/**
* If we own the file descriptor, close the file and throw on error.
* Otherwise, do nothing.
*/
void close();
/**
* Closes the file (if owned). Returns true on success, false (and sets
* errno) on error.
*/
bool closeNoThrow();
/**
* Returns and releases the file descriptor; no longer owned by this File.
* Returns -1 if the File object didn't wrap a file.
*/
int release() noexcept;
/**
* Swap this File with another.
*/
void swap(File& other) noexcept;
// movable
File(File&&) noexcept;
File& operator=(File&&) noexcept;
private:
// unique
File(const File&) = delete;
File& operator=(const File&) = delete;
int fd_;
bool ownsFd_;
};
/**
* Convenience wrappers around some commonly used system calls. The *NoInt
* wrappers retry on EINTR. The *Full wrappers retry on EINTR and also loop
* until all data is written. Note that *Full wrappers weaken the thread
* semantics of underlying system calls.
*/
int openNoInt(const char* name, int flags, mode_t mode = 0666);
int closeNoInt(int fd);
/**
* Similar to readFull and preadFull above, wrappers around write() and
* pwrite() that loop until all data is written.
*
* Generally, the write() / pwrite() system call may always write fewer bytes
* than requested, just like read(). In certain cases (such as when writing to
* a pipe), POSIX provides stronger guarantees, but not in the general case.
* For example, Linux (even on a 64-bit platform) won't write more than 2GB in
* one write() system call.
*
* Note that writevFull and pwritevFull require iov to be non-const, unlike
* writev and pwritev. The contents of iov after these functions return
* is unspecified.
*
* These functions return -1 on error, or the total number of bytes written
* (which is always the same as the number of requested bytes) on success.
*/
ssize_t writeFull(int fd, const void* buf, size_t count);
/**
* Wrapper around read() (and pread()) that, in addition to retrying on
* EINTR, will loop until all data is read.
*
* This wrapper is only useful for blocking file descriptors (for non-blocking
* file descriptors, you have to be prepared to deal with incomplete reads
* anyway), and only exists because POSIX allows read() to return an incomplete
* read if interrupted by a signal (instead of returning -1 and setting errno
* to EINTR).
*
* Note that this wrapper weakens the thread safety of read(): the file pointer
* is shared between threads, but the system call is atomic. If multiple
* threads are reading from a file at the same time, you don't know where your
* data came from in the file, but you do know that the returned bytes were
* contiguous. You can no longer make this assumption if using readFull().
* You should probably use pread() when reading from the same file descriptor
* from multiple threads simultaneously, anyway.
*
* Note that readvFull and preadvFull require iov to be non-const, unlike
* readv and preadv. The contents of iov after these functions return
* is unspecified.
*/
[[nodiscard]] ssize_t readFull(int fd, void* buf, size_t count);
/**
* Read entire file (if num_bytes is defaulted) or no more than
* num_bytes (otherwise) into container *out. The container is assumed
* to be contiguous, with element size equal to 1, and offer size(),
* reserve(), and random access (e.g. std::vector<char>, std::string,
* fbstring).
*
* Returns: true on success or false on failure. In the latter case
* errno will be set appropriately by the failing system primitive.
*/
template <class Container>
bool readFile(
int fd,
Container& out,
size_t num_bytes = std::numeric_limits<size_t>::max()) {
static_assert(
sizeof(out[0]) == 1,
"readFile: only containers with byte-sized elements accepted");
size_t soFar = 0; // amount of bytes successfully read
auto guard = c10::make_scope_exit([&]() {
assert(out.size() >= soFar); // resize better doesn't throw
out.resize(soFar);
});
// Obtain file size:
struct stat buf;
if (fstat(fd, &buf) == -1) {
return false;
}
// Some files (notably under /proc and /sys on Linux) lie about
// their size, so treat the size advertised by fstat under advise
// but don't rely on it. In particular, if the size is zero, we
// should attempt to read stuff. If not zero, we'll attempt to read
// one extra byte.
constexpr size_t initialAlloc = 1024 * 4;
out.resize(std::min(
buf.st_size > 0 ? (size_t(buf.st_size) + 1) : initialAlloc, num_bytes));
while (soFar < out.size()) {
const auto actual = readFull(fd, &out[soFar], out.size() - soFar);
if (actual == -1) {
return false;
}
soFar += actual;
if (soFar < out.size()) {
// File exhausted
break;
}
// Ew, allocate more memory. Use exponential growth to avoid
// quadratic behavior. Cap size to num_bytes.
out.resize(std::min(out.size() * 3 / 2, num_bytes));
}
return true;
}
/**
* Same as above, but takes in a file name instead of fd
*/
template <class Container>
bool readFile(
const char* file_name,
Container& out,
size_t num_bytes = std::numeric_limits<size_t>::max()) {
TORCH_CHECK(file_name);
const auto fd = openNoInt(file_name, O_RDONLY | O_CLOEXEC);
if (fd == -1) {
return false;
}
auto guard = c10::make_scope_exit([&]() {
// Ignore errors when closing the file
closeNoInt(fd);
});
return readFile(fd, out, num_bytes);
}
} // namespace torch::nativert