mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/73707 Add a helper function to detect the file format from the first bytes of a data file or stream. This will be necessary during the migration from Pickle-serialized modules to Flatbuffer-serialized modules. ghstack-source-id: 150384317 Test Plan: Existing tests for ZIP+Pickle continue to pass. New unit tests pass: ``` cd xplat && buck test //xplat/caffe2:test_lite_trainer //xplat/caffe2:test_lite_interpreter Building: finished in 26.6 sec (100%) 3180/3180 jobs, 571/3180 updated Total time: 32.2 sec Testing: finished in 07:08.3 min (89 PASS/0 FAIL) BUILD SUCCEEDED RESULTS FOR //xplat/caffe2:test_lite_interpreter //xplat/caffe2:test_lite_trainer PASS 421.1s 81 Passed 0 Skipped 0 Failed //xplat/caffe2:test_lite_interpreter PASS 103ms 8 Passed 0 Skipped 0 Failed //xplat/caffe2:test_lite_trainer TESTS PASSED ``` Reviewed By: iseeyuan Differential Revision: D34527859 fbshipit-source-id: ff2d1eabc2f8be1de2e44709c878e2d1a373f0df (cherry picked from commit 5c394848346ab9e374c9e7eed479ad70ed09a7ae)
125 lines
3.9 KiB
C++
125 lines
3.9 KiB
C++
#include <torch/csrc/jit/mobile/file_format.h>
|
|
|
|
#include <gtest/gtest.h>
|
|
|
|
#include <sstream>
|
|
|
|
// Tests go in torch::jit
|
|
namespace torch {
|
|
namespace jit {
|
|
|
|
TEST(FileFormatTest, IdentifiesFlatbufferStream) {
|
|
// Create data whose initial bytes look like a Flatbuffer stream.
|
|
std::stringstream data;
|
|
data << "abcd" // First four bytes don't matter.
|
|
<< "PTMF" // Magic string.
|
|
<< "efgh"; // Trailing bytes don't matter.
|
|
|
|
// The data should be identified as Flatbuffer.
|
|
EXPECT_EQ(getFileFormat(data), FileFormat::FlatbufferFileFormat);
|
|
}
|
|
|
|
TEST(FileFormatTest, IdentifiesZipStream) {
|
|
// Create data whose initial bytes look like a ZIP stream.
|
|
std::stringstream data;
|
|
data << "PK\x03\x04" // Magic string.
|
|
<< "abcd" // Trailing bytes don't matter.
|
|
<< "efgh";
|
|
|
|
// The data should be identified as ZIP.
|
|
EXPECT_EQ(getFileFormat(data), FileFormat::ZipFileFormat);
|
|
}
|
|
|
|
TEST(FileFormatTest, FlatbufferTakesPrecedence) {
|
|
// Since the Flatbuffer and ZIP magic bytes are at different offsets,
|
|
// the same data could be identified as both. Demonstrate that Flatbuffer
|
|
// takes precedence. (See details in file_format.h)
|
|
std::stringstream data;
|
|
data << "PK\x03\x04" // ZIP magic string.
|
|
<< "PTMF" // Flatbuffer magic string.
|
|
<< "abcd"; // Trailing bytes don't matter.
|
|
|
|
// The data should be identified as Flatbuffer.
|
|
EXPECT_EQ(getFileFormat(data), FileFormat::FlatbufferFileFormat);
|
|
}
|
|
|
|
TEST(FileFormatTest, HandlesUnknownStream) {
|
|
// Create data that doesn't look like any known format.
|
|
std::stringstream data;
|
|
data << "abcd"
|
|
<< "efgh"
|
|
<< "ijkl";
|
|
|
|
// The data should be classified as unknown.
|
|
EXPECT_EQ(getFileFormat(data), FileFormat::UnknownFileFormat);
|
|
}
|
|
|
|
TEST(FileFormatTest, ShortStreamIsUnknown) {
|
|
// Create data with fewer than kFileFormatHeaderSize (8) bytes.
|
|
std::stringstream data;
|
|
data << "ABCD";
|
|
|
|
// The data should be classified as unknown.
|
|
EXPECT_EQ(getFileFormat(data), FileFormat::UnknownFileFormat);
|
|
}
|
|
|
|
TEST(FileFormatTest, EmptyStreamIsUnknown) {
|
|
// Create an empty stream.
|
|
std::stringstream data;
|
|
|
|
// The data should be classified as unknown.
|
|
EXPECT_EQ(getFileFormat(data), FileFormat::UnknownFileFormat);
|
|
}
|
|
|
|
TEST(FileFormatTest, BadStreamIsUnknown) {
|
|
// Create a stream with valid Flatbuffer data.
|
|
std::stringstream data;
|
|
data << "abcd"
|
|
<< "PTMF" // Flatbuffer magic string.
|
|
<< "efgh";
|
|
|
|
// Demonstrate that the data would normally be identified as Flatbuffer.
|
|
EXPECT_EQ(getFileFormat(data), FileFormat::FlatbufferFileFormat);
|
|
|
|
// Mark the stream as bad, and demonstrate that it is in an error state.
|
|
data.setstate(std::stringstream::badbit);
|
|
// Demonstrate that the stream is in an error state.
|
|
EXPECT_FALSE(data.good());
|
|
|
|
// The data should now be classified as unknown.
|
|
EXPECT_EQ(getFileFormat(data), FileFormat::UnknownFileFormat);
|
|
}
|
|
|
|
TEST(FileFormatTest, StreamOffsetIsObservedAndRestored) {
|
|
// Create data with a Flatbuffer header at a non-zero offset into the stream.
|
|
std::stringstream data;
|
|
// Add initial padding.
|
|
data << "PADDING";
|
|
size_t offset = data.str().size();
|
|
// Add a valid Flatbuffer header.
|
|
data << "abcd"
|
|
<< "PTMF" // Flatbuffer magic string.
|
|
<< "efgh";
|
|
// Seek just after the padding.
|
|
data.seekg(static_cast<std::stringstream::off_type>(offset), data.beg);
|
|
// Demonstrate that the stream points to the beginning of the Flatbuffer data,
|
|
// not to the padding.
|
|
EXPECT_EQ(data.peek(), 'a');
|
|
|
|
// The data should be identified as Flatbuffer.
|
|
EXPECT_EQ(getFileFormat(data), FileFormat::FlatbufferFileFormat);
|
|
|
|
// The stream position should be where it was before identification.
|
|
EXPECT_EQ(offset, data.tellg());
|
|
}
|
|
|
|
TEST(FileFormatTest, HandlesMissingFile) {
|
|
// A missing file should be classified as unknown.
|
|
EXPECT_EQ(
|
|
getFileFormat("NON_EXISTENT_FILE_4965c363-44a7-443c-983a-8895eead0277"),
|
|
FileFormat::UnknownFileFormat);
|
|
}
|
|
|
|
} // namespace jit
|
|
} // namespace torch
|