mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[nativert] Move file_util to pytorch core (#153162)
Summary: fbcode//sigmoid/core/common -> fbcode//caffe2/torch/nativert/common Test Plan: Github CI Differential Revision: D74328089 Pull Request resolved: https://github.com/pytorch/pytorch/pull/153162 Approved by: https://github.com/zhxchen17
This commit is contained in:
committed by
PyTorch MergeBot
parent
70d12ccc3f
commit
f8010e7b93
@ -594,6 +594,7 @@ libtorch_nativert_sources = [
|
||||
"torch/nativert/graph/TensorMeta.cpp",
|
||||
"torch/nativert/executor/Placement.cpp",
|
||||
"torch/nativert/executor/PlacementUtils.cpp",
|
||||
"torch/nativert/common/FileUtil.cpp",
|
||||
]
|
||||
|
||||
torch_mobile_tracer_sources = [
|
||||
|
@ -8,6 +8,7 @@ set(NATIVERT_TEST_SRCS
|
||||
${TORCH_ROOT}/torch/nativert/graph/TensorMeta.cpp
|
||||
${TORCH_ROOT}/torch/nativert/graph/GraphSignature.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/PlacementUtils.cpp
|
||||
${TORCH_ROOT}/torch/nativert/common/FileUtil.cpp
|
||||
)
|
||||
|
||||
add_executable(test_nativert
|
||||
|
111
test/cpp/nativert/test_file_util.cpp
Normal file
111
test/cpp/nativert/test_file_util.cpp
Normal file
@ -0,0 +1,111 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <torch/nativert/common/FileUtil.h>
|
||||
#include <fstream>
|
||||
|
||||
namespace torch {
|
||||
namespace nativert {
|
||||
|
||||
TEST(FileUtilTest, OpenNoInt) {
|
||||
// Create a temporary file
|
||||
std::ofstream tmpFile("tmp_file.txt");
|
||||
tmpFile.close();
|
||||
|
||||
int fd = openNoInt("tmp_file.txt", O_RDONLY, 0);
|
||||
ASSERT_GE(fd, 0);
|
||||
|
||||
closeNoInt(fd);
|
||||
}
|
||||
|
||||
TEST(FileUtilTest, CloseNoInt) {
|
||||
// Create a temporary file
|
||||
std::ofstream tmpFile("tmp_file.txt");
|
||||
tmpFile.close();
|
||||
|
||||
int fd = openNoInt("tmp_file.txt", O_RDONLY, 0);
|
||||
ASSERT_GE(fd, 0);
|
||||
|
||||
int result = closeNoInt(fd);
|
||||
ASSERT_EQ(result, 0);
|
||||
}
|
||||
|
||||
TEST(FileUtilTest, WriteFull) {
|
||||
// Create a temporary file
|
||||
std::ofstream tmpFile("tmp_file.txt");
|
||||
tmpFile.close();
|
||||
|
||||
int fd = openNoInt("tmp_file.txt", O_WRONLY | O_CREAT, 0644);
|
||||
ASSERT_GE(fd, 0);
|
||||
|
||||
const char* data = "Hello, World!";
|
||||
ssize_t bytesWritten = writeFull(fd, data, strlen(data));
|
||||
ASSERT_EQ(bytesWritten, strlen(data));
|
||||
|
||||
closeNoInt(fd);
|
||||
}
|
||||
|
||||
TEST(FileUtilTest, ReadFull) {
|
||||
// Create a temporary file
|
||||
std::ofstream tmpFile("tmp_file.txt");
|
||||
tmpFile << "Hello, World!";
|
||||
tmpFile.close();
|
||||
|
||||
int fd = openNoInt("tmp_file.txt", O_RDONLY, 0);
|
||||
ASSERT_GE(fd, 0);
|
||||
|
||||
char buffer[1024];
|
||||
ssize_t bytesRead = readFull(fd, buffer, 1024);
|
||||
ASSERT_EQ(bytesRead, 13); // length of "Hello, World!"
|
||||
|
||||
closeNoInt(fd);
|
||||
}
|
||||
|
||||
TEST(FileUtilTest, FileConstructor) {
|
||||
// Create a temporary file
|
||||
std::ofstream tmpFile("tmp_file.txt");
|
||||
tmpFile.close();
|
||||
|
||||
File file("tmp_file.txt", O_RDONLY, 0);
|
||||
ASSERT_GE(file.fd(), 0);
|
||||
|
||||
file.close();
|
||||
}
|
||||
|
||||
TEST(FileUtilTest, FileMoveConstructor) {
|
||||
// Create a temporary file
|
||||
std::ofstream tmpFile("tmp_file.txt");
|
||||
tmpFile.close();
|
||||
|
||||
File file1("tmp_file.txt", O_RDONLY, 0);
|
||||
File file2(std::move(file1));
|
||||
|
||||
ASSERT_GE(file2.fd(), 0);
|
||||
ASSERT_EQ(file1.fd(), -1);
|
||||
|
||||
file2.close();
|
||||
}
|
||||
|
||||
TEST(FileUtilTest, FileAssignmentOperator) {
|
||||
// Create a temporary file
|
||||
std::ofstream tmpFile("tmp_file.txt");
|
||||
tmpFile.close();
|
||||
|
||||
File file1("tmp_file.txt", O_RDONLY, 0);
|
||||
File file2;
|
||||
|
||||
file2 = std::move(file1);
|
||||
|
||||
ASSERT_GE(file2.fd(), 0);
|
||||
ASSERT_EQ(file1.fd(), -1);
|
||||
|
||||
file2.close();
|
||||
}
|
||||
|
||||
TEST(FileUtilTest, TemporaryFile) {
|
||||
File file = File::temporary();
|
||||
ASSERT_GE(file.fd(), 0);
|
||||
|
||||
file.close();
|
||||
}
|
||||
|
||||
} // namespace nativert
|
||||
} // namespace torch
|
207
torch/nativert/common/FileUtil.cpp
Normal file
207
torch/nativert/common/FileUtil.cpp
Normal file
@ -0,0 +1,207 @@
|
||||
#include <torch/nativert/common/FileUtil.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <io.h>
|
||||
#define open _open
|
||||
#define read _read
|
||||
#define write _write
|
||||
#define fileno _fileno
|
||||
#define dup _dup
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <cerrno>
|
||||
|
||||
#include <fmt/core.h>
|
||||
|
||||
namespace torch::nativert {
|
||||
|
||||
namespace {
|
||||
|
||||
int unistd_close(int fh) {
|
||||
#ifdef _WIN32
|
||||
return ::_close(fh);
|
||||
#else
|
||||
return ::close(fh);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void incr(ssize_t) {}
|
||||
template <typename Offset>
|
||||
inline void incr(ssize_t n, Offset& offset) {
|
||||
offset += static_cast<Offset>(n);
|
||||
}
|
||||
|
||||
// Wrap call to read/pread/write/pwrite(fd, buf, count, offset?) to retry on
|
||||
// incomplete reads / writes. The variadic argument magic is there to support
|
||||
// an additional argument (offset) for pread / pwrite; see the incr() functions
|
||||
// above which do nothing if the offset is not present and increment it if it
|
||||
// is.
|
||||
template <class F, class... Offset>
|
||||
ssize_t wrapFull(F f, int fd, void* buf, size_t count, Offset... offset) {
|
||||
char* b = static_cast<char*>(buf);
|
||||
ssize_t totalBytes = 0;
|
||||
ssize_t r = -1;
|
||||
do {
|
||||
r = f(fd, b, count, offset...);
|
||||
if (r == -1) {
|
||||
if (errno == EINTR) {
|
||||
continue;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
totalBytes += r;
|
||||
b += r;
|
||||
count -= r;
|
||||
incr(r, offset...);
|
||||
} while (r != 0 && count); // 0 means EOF
|
||||
|
||||
return totalBytes;
|
||||
}
|
||||
|
||||
int filterCloseReturn(int r) {
|
||||
// Ignore EINTR. On Linux, close() may only return EINTR after the file
|
||||
// descriptor has been closed, so you must not retry close() on EINTR --
|
||||
// in the best case, you'll get EBADF, and in the worst case, you'll end up
|
||||
// closing a different file (one opened from another thread).
|
||||
//
|
||||
// Interestingly enough, the Single Unix Specification says that the state
|
||||
// of the file descriptor is unspecified if close returns EINTR. In that
|
||||
// case, the safe thing to do is also not to retry close() -- leaking a file
|
||||
// descriptor is definitely better than closing the wrong file.
|
||||
if (r == -1 && errno == EINTR) {
|
||||
return 0;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// The following wrapX() funcions are private functions for wrapping file-io
|
||||
// against interrupt and partial op completions.
|
||||
|
||||
// Wrap call to f(args) in loop to retry on EINTR
|
||||
template <class F, class... Args>
|
||||
ssize_t wrapNoInt(F f, Args... args) {
|
||||
ssize_t r = -1;
|
||||
do {
|
||||
r = f(std::forward<Args>(args)...);
|
||||
} while (r == -1 && errno == EINTR);
|
||||
return r;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int openNoInt(const char* name, int flags, mode_t mode) {
|
||||
// Android NDK bionic with FORTIFY has this definition:
|
||||
// https://android.googlesource.com/platform/bionic/+/9349b9e51b/libc/include/bits/fortify/fcntl.h
|
||||
// ```
|
||||
// __BIONIC_ERROR_FUNCTION_VISIBILITY
|
||||
// int open(const char* pathname, int flags, mode_t modes, ...) __overloadable
|
||||
// __errorattr(__open_too_many_args_error);
|
||||
// ```
|
||||
// This is originally to prevent open() with incorrect parameters.
|
||||
//
|
||||
// However, combined with folly wrapNotInt, template deduction will fail.
|
||||
// In this case, we create a custom lambda to bypass the error.
|
||||
// The solution is referenced from
|
||||
// https://github.com/llvm/llvm-project/commit/0a0e411204a2baa520fd73a8d69b664f98b428ba
|
||||
//
|
||||
auto openWrapper = [&] { return open(name, flags, mode); };
|
||||
return int(wrapNoInt(openWrapper));
|
||||
}
|
||||
|
||||
int closeNoInt(int fd) {
|
||||
return filterCloseReturn(unistd_close(fd));
|
||||
}
|
||||
|
||||
ssize_t writeFull(int fd, const void* buf, size_t count) {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
|
||||
return wrapFull(write, fd, const_cast<void*>(buf), count);
|
||||
}
|
||||
|
||||
ssize_t readFull(int fd, void* buf, size_t count) {
|
||||
return wrapFull(read, fd, buf, count);
|
||||
}
|
||||
|
||||
File::File(int fd, bool ownsFd) noexcept : fd_(fd), ownsFd_(ownsFd) {
|
||||
TORCH_CHECK(fd >= -1, "fd must be -1 or non-negative");
|
||||
TORCH_CHECK(fd != -1 || !ownsFd, "cannot own -1");
|
||||
}
|
||||
|
||||
File::File(std::string_view name, int flags, mode_t mode)
|
||||
: fd_(::open(std::string(name).c_str(), flags, mode)), ownsFd_(false) {
|
||||
if (fd_ == -1) {
|
||||
throw std::runtime_error(fmt::format(
|
||||
"open(\"{}\", {}, 0{}) failed with errno {}.",
|
||||
name,
|
||||
flags,
|
||||
mode,
|
||||
errno));
|
||||
}
|
||||
ownsFd_ = true;
|
||||
}
|
||||
|
||||
File::File(File&& other) noexcept : fd_(other.fd_), ownsFd_(other.ownsFd_) {
|
||||
other.release();
|
||||
}
|
||||
|
||||
File& File::operator=(File&& other) noexcept {
|
||||
closeNoThrow();
|
||||
swap(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
File::~File() {
|
||||
auto fd = fd_;
|
||||
if (!closeNoThrow()) { // ignore most errors
|
||||
TORCH_CHECK(
|
||||
errno != EBADF,
|
||||
"closing fd ",
|
||||
fd,
|
||||
", it may already ",
|
||||
"have been closed. Another time, this might close the wrong FD.");
|
||||
}
|
||||
}
|
||||
|
||||
/* static */ File File::temporary() {
|
||||
// make a temp file with tmpfile(), dup the fd, then return it in a File.
|
||||
FILE* tmpFile = tmpfile();
|
||||
if (!tmpFile) {
|
||||
throw std::runtime_error("tmpfile() failed");
|
||||
}
|
||||
auto guard = c10::make_scope_exit([&]() { fclose(tmpFile); });
|
||||
|
||||
int fd = ::dup(fileno(tmpFile));
|
||||
if (fd == -1) {
|
||||
throw std::runtime_error("dup() failed");
|
||||
}
|
||||
|
||||
return File(fd, true);
|
||||
}
|
||||
|
||||
int File::release() noexcept {
|
||||
int released = fd_;
|
||||
fd_ = -1;
|
||||
ownsFd_ = false;
|
||||
return released;
|
||||
}
|
||||
|
||||
void File::swap(File& other) noexcept {
|
||||
using std::swap;
|
||||
swap(fd_, other.fd_);
|
||||
swap(ownsFd_, other.ownsFd_);
|
||||
}
|
||||
|
||||
void File::close() {
|
||||
if (!closeNoThrow()) {
|
||||
throw std::runtime_error("close() failed");
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] bool File::closeNoThrow() {
|
||||
int r = ownsFd_ ? unistd_close(fd_) : 0;
|
||||
release();
|
||||
return r == 0;
|
||||
}
|
||||
|
||||
} // namespace torch::nativert
|
258
torch/nativert/common/FileUtil.h
Normal file
258
torch/nativert/common/FileUtil.h
Normal file
@ -0,0 +1,258 @@
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
* Ported from folly/FileUtil.h
|
||||
*/
|
||||
#include <limits>
|
||||
#include <string_view>
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
// Copied from folly/portability/SysTypes.h
|
||||
#ifdef _WIN32
|
||||
#include <basetsd.h>
|
||||
|
||||
// This is a massive pain to have be an `int` due to the pthread implementation
|
||||
// we support, but it's far more compatible with the rest of the windows world
|
||||
// as an `int` than it would be as a `void*`
|
||||
using pid_t = int;
|
||||
|
||||
using uid_t = int;
|
||||
using gid_t = int;
|
||||
|
||||
// This isn't actually supposed to be defined here, but it's the most
|
||||
// appropriate place without defining a portability header for stdint.h
|
||||
// with just this single typedef.
|
||||
using ssize_t = SSIZE_T;
|
||||
|
||||
#ifndef HAVE_MODE_T
|
||||
#define HAVE_MODE_T 1
|
||||
// The Windows headers don't define this anywhere, nor do any of the libs
|
||||
// that Folly depends on, so define it here.
|
||||
using mode_t = unsigned int;
|
||||
#endif
|
||||
|
||||
// Copied from folly/portability/Fcntl.h
|
||||
#define O_CLOEXEC _O_NOINHERIT
|
||||
#endif
|
||||
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/ScopeExit.h>
|
||||
|
||||
namespace torch::nativert {
|
||||
class File {
|
||||
public:
|
||||
/**
|
||||
* Creates an empty File object, for late initialization.
|
||||
*/
|
||||
constexpr File() noexcept : fd_(-1), ownsFd_(false) {}
|
||||
|
||||
/**
|
||||
* Create a File object from an existing file descriptor.
|
||||
*
|
||||
* @param fd Existing file descriptor
|
||||
* @param ownsFd Takes ownership of the file descriptor if ownsFd is true.
|
||||
*/
|
||||
explicit File(int fd, bool ownsFd = false) noexcept;
|
||||
|
||||
/**
|
||||
* Open and create a file object. Throws on error.
|
||||
* Owns the file descriptor implicitly.
|
||||
*/
|
||||
explicit File(
|
||||
std::string_view name,
|
||||
int flags = O_RDONLY,
|
||||
mode_t mode = 0666);
|
||||
|
||||
~File();
|
||||
|
||||
/**
|
||||
* Create and return a temporary, owned file (uses tmpfile()).
|
||||
*/
|
||||
static File temporary();
|
||||
|
||||
/**
|
||||
* Return the file descriptor, or -1 if the file was closed.
|
||||
*/
|
||||
int fd() const {
|
||||
return fd_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns 'true' iff the file was successfully opened.
|
||||
*/
|
||||
explicit operator bool() const {
|
||||
return fd_ != -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* If we own the file descriptor, close the file and throw on error.
|
||||
* Otherwise, do nothing.
|
||||
*/
|
||||
void close();
|
||||
|
||||
/**
|
||||
* Closes the file (if owned). Returns true on success, false (and sets
|
||||
* errno) on error.
|
||||
*/
|
||||
bool closeNoThrow();
|
||||
|
||||
/**
|
||||
* Returns and releases the file descriptor; no longer owned by this File.
|
||||
* Returns -1 if the File object didn't wrap a file.
|
||||
*/
|
||||
int release() noexcept;
|
||||
|
||||
/**
|
||||
* Swap this File with another.
|
||||
*/
|
||||
void swap(File& other) noexcept;
|
||||
|
||||
// movable
|
||||
File(File&&) noexcept;
|
||||
File& operator=(File&&) noexcept;
|
||||
|
||||
private:
|
||||
// unique
|
||||
File(const File&) = delete;
|
||||
File& operator=(const File&) = delete;
|
||||
|
||||
int fd_;
|
||||
bool ownsFd_;
|
||||
};
|
||||
|
||||
/**
|
||||
* Convenience wrappers around some commonly used system calls. The *NoInt
|
||||
* wrappers retry on EINTR. The *Full wrappers retry on EINTR and also loop
|
||||
* until all data is written. Note that *Full wrappers weaken the thread
|
||||
* semantics of underlying system calls.
|
||||
*/
|
||||
int openNoInt(const char* name, int flags, mode_t mode = 0666);
|
||||
int closeNoInt(int fd);
|
||||
|
||||
/**
|
||||
* Similar to readFull and preadFull above, wrappers around write() and
|
||||
* pwrite() that loop until all data is written.
|
||||
*
|
||||
* Generally, the write() / pwrite() system call may always write fewer bytes
|
||||
* than requested, just like read(). In certain cases (such as when writing to
|
||||
* a pipe), POSIX provides stronger guarantees, but not in the general case.
|
||||
* For example, Linux (even on a 64-bit platform) won't write more than 2GB in
|
||||
* one write() system call.
|
||||
*
|
||||
* Note that writevFull and pwritevFull require iov to be non-const, unlike
|
||||
* writev and pwritev. The contents of iov after these functions return
|
||||
* is unspecified.
|
||||
*
|
||||
* These functions return -1 on error, or the total number of bytes written
|
||||
* (which is always the same as the number of requested bytes) on success.
|
||||
*/
|
||||
ssize_t writeFull(int fd, const void* buf, size_t count);
|
||||
|
||||
/**
|
||||
* Wrapper around read() (and pread()) that, in addition to retrying on
|
||||
* EINTR, will loop until all data is read.
|
||||
*
|
||||
* This wrapper is only useful for blocking file descriptors (for non-blocking
|
||||
* file descriptors, you have to be prepared to deal with incomplete reads
|
||||
* anyway), and only exists because POSIX allows read() to return an incomplete
|
||||
* read if interrupted by a signal (instead of returning -1 and setting errno
|
||||
* to EINTR).
|
||||
*
|
||||
* Note that this wrapper weakens the thread safety of read(): the file pointer
|
||||
* is shared between threads, but the system call is atomic. If multiple
|
||||
* threads are reading from a file at the same time, you don't know where your
|
||||
* data came from in the file, but you do know that the returned bytes were
|
||||
* contiguous. You can no longer make this assumption if using readFull().
|
||||
* You should probably use pread() when reading from the same file descriptor
|
||||
* from multiple threads simultaneously, anyway.
|
||||
*
|
||||
* Note that readvFull and preadvFull require iov to be non-const, unlike
|
||||
* readv and preadv. The contents of iov after these functions return
|
||||
* is unspecified.
|
||||
*/
|
||||
[[nodiscard]] ssize_t readFull(int fd, void* buf, size_t count);
|
||||
|
||||
/**
|
||||
* Read entire file (if num_bytes is defaulted) or no more than
|
||||
* num_bytes (otherwise) into container *out. The container is assumed
|
||||
* to be contiguous, with element size equal to 1, and offer size(),
|
||||
* reserve(), and random access (e.g. std::vector<char>, std::string,
|
||||
* fbstring).
|
||||
*
|
||||
* Returns: true on success or false on failure. In the latter case
|
||||
* errno will be set appropriately by the failing system primitive.
|
||||
*/
|
||||
template <class Container>
|
||||
bool readFile(
|
||||
int fd,
|
||||
Container& out,
|
||||
size_t num_bytes = std::numeric_limits<size_t>::max()) {
|
||||
static_assert(
|
||||
sizeof(out[0]) == 1,
|
||||
"readFile: only containers with byte-sized elements accepted");
|
||||
|
||||
size_t soFar = 0; // amount of bytes successfully read
|
||||
auto guard = c10::make_scope_exit([&]() {
|
||||
assert(out.size() >= soFar); // resize better doesn't throw
|
||||
out.resize(soFar);
|
||||
});
|
||||
|
||||
// Obtain file size:
|
||||
struct stat buf;
|
||||
if (fstat(fd, &buf) == -1) {
|
||||
return false;
|
||||
}
|
||||
// Some files (notably under /proc and /sys on Linux) lie about
|
||||
// their size, so treat the size advertised by fstat under advise
|
||||
// but don't rely on it. In particular, if the size is zero, we
|
||||
// should attempt to read stuff. If not zero, we'll attempt to read
|
||||
// one extra byte.
|
||||
constexpr size_t initialAlloc = 1024 * 4;
|
||||
out.resize(std::min(
|
||||
buf.st_size > 0 ? (size_t(buf.st_size) + 1) : initialAlloc, num_bytes));
|
||||
|
||||
while (soFar < out.size()) {
|
||||
const auto actual = readFull(fd, &out[soFar], out.size() - soFar);
|
||||
if (actual == -1) {
|
||||
return false;
|
||||
}
|
||||
soFar += actual;
|
||||
if (soFar < out.size()) {
|
||||
// File exhausted
|
||||
break;
|
||||
}
|
||||
// Ew, allocate more memory. Use exponential growth to avoid
|
||||
// quadratic behavior. Cap size to num_bytes.
|
||||
out.resize(std::min(out.size() * 3 / 2, num_bytes));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as above, but takes in a file name instead of fd
|
||||
*/
|
||||
template <class Container>
|
||||
bool readFile(
|
||||
const char* file_name,
|
||||
Container& out,
|
||||
size_t num_bytes = std::numeric_limits<size_t>::max()) {
|
||||
TORCH_CHECK(file_name);
|
||||
|
||||
const auto fd = openNoInt(file_name, O_RDONLY | O_CLOEXEC);
|
||||
if (fd == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto guard = c10::make_scope_exit([&]() {
|
||||
// Ignore errors when closing the file
|
||||
closeNoInt(fd);
|
||||
});
|
||||
|
||||
return readFile(fd, out, num_bytes);
|
||||
}
|
||||
|
||||
} // namespace torch::nativert
|
Reference in New Issue
Block a user