From f8010e7b934ab5f289a9d0f92168476882d497d4 Mon Sep 17 00:00:00 2001 From: Georgia Phillips Date: Tue, 27 May 2025 03:42:47 +0000 Subject: [PATCH] [nativert] Move file_util to pytorch core (#153162) Summary: fbcode//sigmoid/core/common -> fbcode//caffe2/torch/nativert/common Test Plan: Github CI Differential Revision: D74328089 Pull Request resolved: https://github.com/pytorch/pytorch/pull/153162 Approved by: https://github.com/zhxchen17 --- build_variables.bzl | 1 + test/cpp/nativert/CMakeLists.txt | 1 + test/cpp/nativert/test_file_util.cpp | 111 ++++++++++++ torch/nativert/common/FileUtil.cpp | 207 +++++++++++++++++++++ torch/nativert/common/FileUtil.h | 258 +++++++++++++++++++++++++++ 5 files changed, 578 insertions(+) create mode 100644 test/cpp/nativert/test_file_util.cpp create mode 100644 torch/nativert/common/FileUtil.cpp create mode 100644 torch/nativert/common/FileUtil.h diff --git a/build_variables.bzl b/build_variables.bzl index 85cfb3b0f646..2b03902dda61 100644 --- a/build_variables.bzl +++ b/build_variables.bzl @@ -594,6 +594,7 @@ libtorch_nativert_sources = [ "torch/nativert/graph/TensorMeta.cpp", "torch/nativert/executor/Placement.cpp", "torch/nativert/executor/PlacementUtils.cpp", + "torch/nativert/common/FileUtil.cpp", ] torch_mobile_tracer_sources = [ diff --git a/test/cpp/nativert/CMakeLists.txt b/test/cpp/nativert/CMakeLists.txt index 8ef451b7139d..620636330a4f 100644 --- a/test/cpp/nativert/CMakeLists.txt +++ b/test/cpp/nativert/CMakeLists.txt @@ -8,6 +8,7 @@ set(NATIVERT_TEST_SRCS ${TORCH_ROOT}/torch/nativert/graph/TensorMeta.cpp ${TORCH_ROOT}/torch/nativert/graph/GraphSignature.cpp ${TORCH_ROOT}/torch/nativert/executor/PlacementUtils.cpp + ${TORCH_ROOT}/torch/nativert/common/FileUtil.cpp ) add_executable(test_nativert diff --git a/test/cpp/nativert/test_file_util.cpp b/test/cpp/nativert/test_file_util.cpp new file mode 100644 index 000000000000..05d0b8203ad3 --- /dev/null +++ b/test/cpp/nativert/test_file_util.cpp @@ -0,0 +1,111 @@ +#include +#include +#include + +namespace torch { +namespace nativert { + +TEST(FileUtilTest, OpenNoInt) { + // Create a temporary file + std::ofstream tmpFile("tmp_file.txt"); + tmpFile.close(); + + int fd = openNoInt("tmp_file.txt", O_RDONLY, 0); + ASSERT_GE(fd, 0); + + closeNoInt(fd); +} + +TEST(FileUtilTest, CloseNoInt) { + // Create a temporary file + std::ofstream tmpFile("tmp_file.txt"); + tmpFile.close(); + + int fd = openNoInt("tmp_file.txt", O_RDONLY, 0); + ASSERT_GE(fd, 0); + + int result = closeNoInt(fd); + ASSERT_EQ(result, 0); +} + +TEST(FileUtilTest, WriteFull) { + // Create a temporary file + std::ofstream tmpFile("tmp_file.txt"); + tmpFile.close(); + + int fd = openNoInt("tmp_file.txt", O_WRONLY | O_CREAT, 0644); + ASSERT_GE(fd, 0); + + const char* data = "Hello, World!"; + ssize_t bytesWritten = writeFull(fd, data, strlen(data)); + ASSERT_EQ(bytesWritten, strlen(data)); + + closeNoInt(fd); +} + +TEST(FileUtilTest, ReadFull) { + // Create a temporary file + std::ofstream tmpFile("tmp_file.txt"); + tmpFile << "Hello, World!"; + tmpFile.close(); + + int fd = openNoInt("tmp_file.txt", O_RDONLY, 0); + ASSERT_GE(fd, 0); + + char buffer[1024]; + ssize_t bytesRead = readFull(fd, buffer, 1024); + ASSERT_EQ(bytesRead, 13); // length of "Hello, World!" + + closeNoInt(fd); +} + +TEST(FileUtilTest, FileConstructor) { + // Create a temporary file + std::ofstream tmpFile("tmp_file.txt"); + tmpFile.close(); + + File file("tmp_file.txt", O_RDONLY, 0); + ASSERT_GE(file.fd(), 0); + + file.close(); +} + +TEST(FileUtilTest, FileMoveConstructor) { + // Create a temporary file + std::ofstream tmpFile("tmp_file.txt"); + tmpFile.close(); + + File file1("tmp_file.txt", O_RDONLY, 0); + File file2(std::move(file1)); + + ASSERT_GE(file2.fd(), 0); + ASSERT_EQ(file1.fd(), -1); + + file2.close(); +} + +TEST(FileUtilTest, FileAssignmentOperator) { + // Create a temporary file + std::ofstream tmpFile("tmp_file.txt"); + tmpFile.close(); + + File file1("tmp_file.txt", O_RDONLY, 0); + File file2; + + file2 = std::move(file1); + + ASSERT_GE(file2.fd(), 0); + ASSERT_EQ(file1.fd(), -1); + + file2.close(); +} + +TEST(FileUtilTest, TemporaryFile) { + File file = File::temporary(); + ASSERT_GE(file.fd(), 0); + + file.close(); +} + +} // namespace nativert +} // namespace torch diff --git a/torch/nativert/common/FileUtil.cpp b/torch/nativert/common/FileUtil.cpp new file mode 100644 index 000000000000..10c03638740f --- /dev/null +++ b/torch/nativert/common/FileUtil.cpp @@ -0,0 +1,207 @@ +#include + +#ifdef _WIN32 +#include +#define open _open +#define read _read +#define write _write +#define fileno _fileno +#define dup _dup +#else +#include +#endif +#include + +#include + +namespace torch::nativert { + +namespace { + +int unistd_close(int fh) { +#ifdef _WIN32 + return ::_close(fh); +#else + return ::close(fh); +#endif +} + +inline void incr(ssize_t) {} +template +inline void incr(ssize_t n, Offset& offset) { + offset += static_cast(n); +} + +// Wrap call to read/pread/write/pwrite(fd, buf, count, offset?) to retry on +// incomplete reads / writes. The variadic argument magic is there to support +// an additional argument (offset) for pread / pwrite; see the incr() functions +// above which do nothing if the offset is not present and increment it if it +// is. +template +ssize_t wrapFull(F f, int fd, void* buf, size_t count, Offset... offset) { + char* b = static_cast(buf); + ssize_t totalBytes = 0; + ssize_t r = -1; + do { + r = f(fd, b, count, offset...); + if (r == -1) { + if (errno == EINTR) { + continue; + } + return r; + } + + totalBytes += r; + b += r; + count -= r; + incr(r, offset...); + } while (r != 0 && count); // 0 means EOF + + return totalBytes; +} + +int filterCloseReturn(int r) { + // Ignore EINTR. On Linux, close() may only return EINTR after the file + // descriptor has been closed, so you must not retry close() on EINTR -- + // in the best case, you'll get EBADF, and in the worst case, you'll end up + // closing a different file (one opened from another thread). + // + // Interestingly enough, the Single Unix Specification says that the state + // of the file descriptor is unspecified if close returns EINTR. In that + // case, the safe thing to do is also not to retry close() -- leaking a file + // descriptor is definitely better than closing the wrong file. + if (r == -1 && errno == EINTR) { + return 0; + } + return r; +} + +// The following wrapX() funcions are private functions for wrapping file-io +// against interrupt and partial op completions. + +// Wrap call to f(args) in loop to retry on EINTR +template +ssize_t wrapNoInt(F f, Args... args) { + ssize_t r = -1; + do { + r = f(std::forward(args)...); + } while (r == -1 && errno == EINTR); + return r; +} + +} // namespace + +int openNoInt(const char* name, int flags, mode_t mode) { + // Android NDK bionic with FORTIFY has this definition: + // https://android.googlesource.com/platform/bionic/+/9349b9e51b/libc/include/bits/fortify/fcntl.h + // ``` + // __BIONIC_ERROR_FUNCTION_VISIBILITY + // int open(const char* pathname, int flags, mode_t modes, ...) __overloadable + // __errorattr(__open_too_many_args_error); + // ``` + // This is originally to prevent open() with incorrect parameters. + // + // However, combined with folly wrapNotInt, template deduction will fail. + // In this case, we create a custom lambda to bypass the error. + // The solution is referenced from + // https://github.com/llvm/llvm-project/commit/0a0e411204a2baa520fd73a8d69b664f98b428ba + // + auto openWrapper = [&] { return open(name, flags, mode); }; + return int(wrapNoInt(openWrapper)); +} + +int closeNoInt(int fd) { + return filterCloseReturn(unistd_close(fd)); +} + +ssize_t writeFull(int fd, const void* buf, size_t count) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) + return wrapFull(write, fd, const_cast(buf), count); +} + +ssize_t readFull(int fd, void* buf, size_t count) { + return wrapFull(read, fd, buf, count); +} + +File::File(int fd, bool ownsFd) noexcept : fd_(fd), ownsFd_(ownsFd) { + TORCH_CHECK(fd >= -1, "fd must be -1 or non-negative"); + TORCH_CHECK(fd != -1 || !ownsFd, "cannot own -1"); +} + +File::File(std::string_view name, int flags, mode_t mode) + : fd_(::open(std::string(name).c_str(), flags, mode)), ownsFd_(false) { + if (fd_ == -1) { + throw std::runtime_error(fmt::format( + "open(\"{}\", {}, 0{}) failed with errno {}.", + name, + flags, + mode, + errno)); + } + ownsFd_ = true; +} + +File::File(File&& other) noexcept : fd_(other.fd_), ownsFd_(other.ownsFd_) { + other.release(); +} + +File& File::operator=(File&& other) noexcept { + closeNoThrow(); + swap(other); + return *this; +} + +File::~File() { + auto fd = fd_; + if (!closeNoThrow()) { // ignore most errors + TORCH_CHECK( + errno != EBADF, + "closing fd ", + fd, + ", it may already ", + "have been closed. Another time, this might close the wrong FD."); + } +} + +/* static */ File File::temporary() { + // make a temp file with tmpfile(), dup the fd, then return it in a File. + FILE* tmpFile = tmpfile(); + if (!tmpFile) { + throw std::runtime_error("tmpfile() failed"); + } + auto guard = c10::make_scope_exit([&]() { fclose(tmpFile); }); + + int fd = ::dup(fileno(tmpFile)); + if (fd == -1) { + throw std::runtime_error("dup() failed"); + } + + return File(fd, true); +} + +int File::release() noexcept { + int released = fd_; + fd_ = -1; + ownsFd_ = false; + return released; +} + +void File::swap(File& other) noexcept { + using std::swap; + swap(fd_, other.fd_); + swap(ownsFd_, other.ownsFd_); +} + +void File::close() { + if (!closeNoThrow()) { + throw std::runtime_error("close() failed"); + } +} + +[[nodiscard]] bool File::closeNoThrow() { + int r = ownsFd_ ? unistd_close(fd_) : 0; + release(); + return r == 0; +} + +} // namespace torch::nativert diff --git a/torch/nativert/common/FileUtil.h b/torch/nativert/common/FileUtil.h new file mode 100644 index 000000000000..28fc7c11bc35 --- /dev/null +++ b/torch/nativert/common/FileUtil.h @@ -0,0 +1,258 @@ +#pragma once + +/* + * Ported from folly/FileUtil.h + */ +#include +#include + +#include +#include +#include + +// Copied from folly/portability/SysTypes.h +#ifdef _WIN32 +#include + +// This is a massive pain to have be an `int` due to the pthread implementation +// we support, but it's far more compatible with the rest of the windows world +// as an `int` than it would be as a `void*` +using pid_t = int; + +using uid_t = int; +using gid_t = int; + +// This isn't actually supposed to be defined here, but it's the most +// appropriate place without defining a portability header for stdint.h +// with just this single typedef. +using ssize_t = SSIZE_T; + +#ifndef HAVE_MODE_T +#define HAVE_MODE_T 1 +// The Windows headers don't define this anywhere, nor do any of the libs +// that Folly depends on, so define it here. +using mode_t = unsigned int; +#endif + +// Copied from folly/portability/Fcntl.h +#define O_CLOEXEC _O_NOINHERIT +#endif + +#include +#include + +namespace torch::nativert { +class File { + public: + /** + * Creates an empty File object, for late initialization. + */ + constexpr File() noexcept : fd_(-1), ownsFd_(false) {} + + /** + * Create a File object from an existing file descriptor. + * + * @param fd Existing file descriptor + * @param ownsFd Takes ownership of the file descriptor if ownsFd is true. + */ + explicit File(int fd, bool ownsFd = false) noexcept; + + /** + * Open and create a file object. Throws on error. + * Owns the file descriptor implicitly. + */ + explicit File( + std::string_view name, + int flags = O_RDONLY, + mode_t mode = 0666); + + ~File(); + + /** + * Create and return a temporary, owned file (uses tmpfile()). + */ + static File temporary(); + + /** + * Return the file descriptor, or -1 if the file was closed. + */ + int fd() const { + return fd_; + } + + /** + * Returns 'true' iff the file was successfully opened. + */ + explicit operator bool() const { + return fd_ != -1; + } + + /** + * If we own the file descriptor, close the file and throw on error. + * Otherwise, do nothing. + */ + void close(); + + /** + * Closes the file (if owned). Returns true on success, false (and sets + * errno) on error. + */ + bool closeNoThrow(); + + /** + * Returns and releases the file descriptor; no longer owned by this File. + * Returns -1 if the File object didn't wrap a file. + */ + int release() noexcept; + + /** + * Swap this File with another. + */ + void swap(File& other) noexcept; + + // movable + File(File&&) noexcept; + File& operator=(File&&) noexcept; + + private: + // unique + File(const File&) = delete; + File& operator=(const File&) = delete; + + int fd_; + bool ownsFd_; +}; + +/** + * Convenience wrappers around some commonly used system calls. The *NoInt + * wrappers retry on EINTR. The *Full wrappers retry on EINTR and also loop + * until all data is written. Note that *Full wrappers weaken the thread + * semantics of underlying system calls. + */ +int openNoInt(const char* name, int flags, mode_t mode = 0666); +int closeNoInt(int fd); + +/** + * Similar to readFull and preadFull above, wrappers around write() and + * pwrite() that loop until all data is written. + * + * Generally, the write() / pwrite() system call may always write fewer bytes + * than requested, just like read(). In certain cases (such as when writing to + * a pipe), POSIX provides stronger guarantees, but not in the general case. + * For example, Linux (even on a 64-bit platform) won't write more than 2GB in + * one write() system call. + * + * Note that writevFull and pwritevFull require iov to be non-const, unlike + * writev and pwritev. The contents of iov after these functions return + * is unspecified. + * + * These functions return -1 on error, or the total number of bytes written + * (which is always the same as the number of requested bytes) on success. + */ +ssize_t writeFull(int fd, const void* buf, size_t count); + +/** + * Wrapper around read() (and pread()) that, in addition to retrying on + * EINTR, will loop until all data is read. + * + * This wrapper is only useful for blocking file descriptors (for non-blocking + * file descriptors, you have to be prepared to deal with incomplete reads + * anyway), and only exists because POSIX allows read() to return an incomplete + * read if interrupted by a signal (instead of returning -1 and setting errno + * to EINTR). + * + * Note that this wrapper weakens the thread safety of read(): the file pointer + * is shared between threads, but the system call is atomic. If multiple + * threads are reading from a file at the same time, you don't know where your + * data came from in the file, but you do know that the returned bytes were + * contiguous. You can no longer make this assumption if using readFull(). + * You should probably use pread() when reading from the same file descriptor + * from multiple threads simultaneously, anyway. + * + * Note that readvFull and preadvFull require iov to be non-const, unlike + * readv and preadv. The contents of iov after these functions return + * is unspecified. + */ +[[nodiscard]] ssize_t readFull(int fd, void* buf, size_t count); + +/** + * Read entire file (if num_bytes is defaulted) or no more than + * num_bytes (otherwise) into container *out. The container is assumed + * to be contiguous, with element size equal to 1, and offer size(), + * reserve(), and random access (e.g. std::vector, std::string, + * fbstring). + * + * Returns: true on success or false on failure. In the latter case + * errno will be set appropriately by the failing system primitive. + */ +template +bool readFile( + int fd, + Container& out, + size_t num_bytes = std::numeric_limits::max()) { + static_assert( + sizeof(out[0]) == 1, + "readFile: only containers with byte-sized elements accepted"); + + size_t soFar = 0; // amount of bytes successfully read + auto guard = c10::make_scope_exit([&]() { + assert(out.size() >= soFar); // resize better doesn't throw + out.resize(soFar); + }); + + // Obtain file size: + struct stat buf; + if (fstat(fd, &buf) == -1) { + return false; + } + // Some files (notably under /proc and /sys on Linux) lie about + // their size, so treat the size advertised by fstat under advise + // but don't rely on it. In particular, if the size is zero, we + // should attempt to read stuff. If not zero, we'll attempt to read + // one extra byte. + constexpr size_t initialAlloc = 1024 * 4; + out.resize(std::min( + buf.st_size > 0 ? (size_t(buf.st_size) + 1) : initialAlloc, num_bytes)); + + while (soFar < out.size()) { + const auto actual = readFull(fd, &out[soFar], out.size() - soFar); + if (actual == -1) { + return false; + } + soFar += actual; + if (soFar < out.size()) { + // File exhausted + break; + } + // Ew, allocate more memory. Use exponential growth to avoid + // quadratic behavior. Cap size to num_bytes. + out.resize(std::min(out.size() * 3 / 2, num_bytes)); + } + + return true; +} + +/** + * Same as above, but takes in a file name instead of fd + */ +template +bool readFile( + const char* file_name, + Container& out, + size_t num_bytes = std::numeric_limits::max()) { + TORCH_CHECK(file_name); + + const auto fd = openNoInt(file_name, O_RDONLY | O_CLOEXEC); + if (fd == -1) { + return false; + } + + auto guard = c10::make_scope_exit([&]() { + // Ignore errors when closing the file + closeNoInt(fd); + }); + + return readFile(fd, out, num_bytes); +} + +} // namespace torch::nativert