mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Hi! I've been fuzzing different pytorch modules with with [sydr-fuzz](https://github.com/ispras/oss-sydr-fuzz/tree/master/projects/pytorch), and found a multiple crashes in torch::jit::load() function. All found errors could be reproduced with provided docker: [Dockerfile](https://github.com/ispras/oss-sydr-fuzz/tree/master/projects/pytorch). ### Crash in torch/csrc/jit/unpickler.cpp:1075 [crash-1f59083b8396c5b62b4705c7556e68f129e833b1.zip](https://github.com/pytorch/pytorch/files/11552947/crash-1f59083b8396c5b62b4705c7556e68f129e833b1.zip) ```asan "#0 0x00007ffff7a5600b in raise () from /lib/x86_64-linux-gnu/libc.so.6", "#1 0x00007ffff7a35859 in abort () from /lib/x86_64-linux-gnu/libc.so.6", "#2 0x00007ffff7ce3911 in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6", "#3 0x00007ffff7cef38c in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6", "#4 0x00007ffff7cef3f7 in std::terminate() () from /lib/x86_64-linux-gnu/libstdc++.so.6", "#5 0x00007ffff7cef6a9 in __cxa_throw () from /lib/x86_64-linux-gnu/libstdc++.so.6", "#6 0x00007ffff7ce6326 in std::__throw_length_error(char const*) () from /lib/x86_64-linux-gnu/libstdc++.so.6", "#7 0x00007ffff7d87edc in std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_create(unsigned long&, unsigned long) () from /lib/x86_64-linux-gnu/libstdc++.so.6", "#8 0x00007ffff7d88880 in std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::reserve(unsigned long) () from /lib/x86_64-linux-gnu/libstdc++.so.6", "#9 0x000000000ea52931 in torch::jit::Unpickler::readBytes[abi:cxx11](unsigned long) (this=this@entry=0x7fffffffac10, length=length@entry=8358680908539635837) at /pytorch/torch/csrc/jit/serialization/unpickler.cpp:1075", "#10 0x000000000ea4c3a0 in torch::jit::Unpickler::readInstruction (this=0x7fffffff90d0) at /pytorch/torch/csrc/jit/serialization/unpickler.cpp:355", "#11 0x000000000ea49eb8 in torch::jit::Unpickler::run (this=0x7fffffffac10) at /pytorch/torch/csrc/jit/serialization/unpickler.cpp:251", "#12 0x000000000ea49b12 in torch::jit::Unpickler::parse_ivalue (this=0x7fffffffac10) at /pytorch/torch/csrc/jit/serialization/unpickler.cpp:204", "#13 0x000000000e960a9f in torch::jit::readArchiveAndTensors(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, c10::optional<std::function<c10::StrongTypePtr (c10::QualifiedName const&)> >, c10::optional<std::function<c10::intrusive_ptr<c10::ivalue::Object, c10::detail::intrusive_target_default_null_type<c10::ivalue::Object> > (c10::StrongTypePtr, c10::IValue)> >, c10::optional<c10::Device>, caffe2::serialize::PyTorchStreamReader&, c10::Type::SingletonOrSharedTypePtr<c10::Type> (*)(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&), std::shared_ptr<torch::jit::DeserializationStorageContext>) (archive_name=..., pickle_prefix=..., tensor_prefix=..., type_resolver=..., obj_loader=..., device=..., stream_reader=..., type_parser=<optimized out>, storage_context=...) at /pytorch/torch/csrc/jit/serialization/import_read.cpp:53", "#14 0x000000000e8ef599 in torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive (this=0x7fffffffbc60, archive_name=...) at /pytorch/torch/csrc/jit/serialization/import.cpp:184", "#15 0x000000000e8eb886 in torch::jit::(anonymous namespace)::ScriptModuleDeserializer::deserialize (this=<optimized out>, device=..., extra_files=..., restore_shapes=<optimized out>) at /pytorch/torch/csrc/jit/serialization/import.cpp:287", "#16 0x000000000e8e9cc5 in torch::jit::import_ir_module (cu=..., in=..., device=..., extra_files=..., load_debug_files=<optimized out>, restore_shapes=<optimized out>) at /pytorch/torch/csrc/jit/serialization/import.cpp:386", "#17 0x000000000e8f37bf in torch::jit::import_ir_module (cu=..., in=..., device=..., load_debug_files=<optimized out>) at /pytorch/torch/csrc/jit/serialization/import.cpp:322", "#18 0x000000000e8f615a in torch::jit::load (in=..., device=..., load_debug_files=<optimized out>) at /pytorch/torch/csrc/jit/serialization/import.cpp:482", "#19 0x00000000005c2d61 in LLVMFuzzerTestOneInput (data=<optimized out>, size=1663) at /load.cc:42", "#20 0x00000000005c2a8e in ExecuteFilesOnyByOne (argc=2, argv=0x7fffffffc6b8, callback=callback@entry=0x5c2ae0 <LLVMFuzzerTestOneInput(uint8_t const*, size_t)>) at /AFLplusplus/utils/aflpp_driver/aflpp_driver.c:255", "#21 0x00000000005c2899 in LLVMFuzzerRunDriver (argcp=argcp@entry=0x7fffffffc5b4, argvp=argvp@entry=0x7fffffffc5b8, callback=0x5c2ae0 <LLVMFuzzerTestOneInput(uint8_t const*, size_t)>) at /AFLplusplus/utils/aflpp_driver/aflpp_driver.c:364", "#22 0x00000000005c2459 in main (argc=2, argv=0x7fffffffc6b8) at /AFLplusplus/utils/aflpp_driver/aflpp_driver.c:300" ``` ### Crash in torch/csrc/jit/unpickler.cpp:386 [crash-2e9923de375c393e700e8c0441f0ebe8252ca364.zip](https://github.com/pytorch/pytorch/files/11552950/crash-2e9923de375c393e700e8c0441f0ebe8252ca364.zip) ```asan "#0 0x00007ffff7a5600b in raise () from /lib/x86_64-linux-gnu/libc.so.6", "#1 0x00007ffff7a35859 in abort () from /lib/x86_64-linux-gnu/libc.so.6", "#2 0x00007ffff7ce3911 in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6", "#3 0x00007ffff7cef38c in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6", "#4 0x00007ffff7cef3f7 in std::terminate() () from /lib/x86_64-linux-gnu/libstdc++.so.6", "#5 0x00007ffff7cef6a9 in __cxa_throw () from /lib/x86_64-linux-gnu/libstdc++.so.6", "#6 0x00007ffff7ce6326 in std::__throw_length_error(char const*) () from /lib/x86_64-linux-gnu/libstdc++.so.6", "#7 0x0000000000670aff in std::vector<c10::IValue, std::allocator<c10::IValue> >::reserve (this=this@entry=0x7fffffff9750, __n=__n@entry=18446744073709551614) at /usr/include/c++/10/bits/vector.tcc:70", "#8 0x000000000ea4d5cd in torch::jit::Unpickler::readInstruction (this=0x7fffffffac10) at /pytorch/torch/csrc/jit/serialization/unpickler.cpp:386", "#9 0x000000000ea49eb8 in torch::jit::Unpickler::run (this=0x7fffffffac10) at /pytorch/torch/csrc/jit/serialization/unpickler.cpp:251", "#10 0x000000000ea49b12 in torch::jit::Unpickler::parse_ivalue (this=0x7fffffffac10) at /pytorch/torch/csrc/jit/serialization/unpickler.cpp:204", "#11 0x000000000e960a9f in torch::jit::readArchiveAndTensors(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, c10::optional<std::function<c10::StrongTypePtr (c10::QualifiedName const&)> >, c10::optional<std::function<c10::intrusive_ptr<c10::ivalue::Object, c10::detail::intrusive_target_default_null_type<c10::ivalue::Object> > (c10::StrongTypePtr, c10::IValue)> >, c10::optional<c10::Device>, caffe2::serialize::PyTorchStreamReader&, c10::Type::SingletonOrSharedTypePtr<c10::Type> (*)(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&), std::shared_ptr<torch::jit::DeserializationStorageContext>) (archive_name=..., pickle_prefix=..., tensor_prefix=..., type_resolver=..., obj_loader=..., device=..., stream_reader=..., type_parser=<optimized out>, storage_context=...) at /pytorch/torch/csrc/jit/serialization/import_read.cpp:53", "#12 0x000000000e8ef599 in torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive (this=0x7fffffffbc60, archive_name=...) at /pytorch/torch/csrc/jit/serialization/import.cpp:184", "#13 0x000000000e8eb886 in torch::jit::(anonymous namespace)::ScriptModuleDeserializer::deserialize (this=<optimized out>, device=..., extra_files=..., restore_shapes=<optimized out>) at /pytorch/torch/csrc/jit/serialization/import.cpp:287", "#14 0x000000000e8e9cc5 in torch::jit::import_ir_module (cu=..., in=..., device=..., extra_files=..., load_debug_files=<optimized out>, restore_shapes=<optimized out>) at /pytorch/torch/csrc/jit/serialization/import.cpp:386", "#15 0x000000000e8f37bf in torch::jit::import_ir_module (cu=..., in=..., device=..., load_debug_files=<optimized out>) at /pytorch/torch/csrc/jit/serialization/import.cpp:322", "#16 0x000000000e8f615a in torch::jit::load (in=..., device=..., load_debug_files=<optimized out>) at /pytorch/torch/csrc/jit/serialization/import.cpp:482", "#17 0x00000000005c2d61 in LLVMFuzzerTestOneInput (data=<optimized out>, size=5498) at /load.cc:42", "#18 0x00000000005c2a8e in ExecuteFilesOnyByOne (argc=2, argv=0x7fffffffc6b8, callback=callback@entry=0x5c2ae0 <LLVMFuzzerTestOneInput(uint8_t const*, size_t)>) at /AFLplusplus/utils/aflpp_driver/aflpp_driver.c:255", "#19 0x00000000005c2899 in LLVMFuzzerRunDriver (argcp=argcp@entry=0x7fffffffc5b4, argvp=argvp@entry=0x7fffffffc5b8, callback=0x5c2ae0 <LLVMFuzzerTestOneInput(uint8_t const*, size_t)>) at /AFLplusplus/utils/aflpp_driver/aflpp_driver.c:364", "#20 0x00000000005c2459 in main (argc=2, argv=0x7fffffffc6b8) at /AFLplusplus/utils/aflpp_driver/aflpp_driver.c:300" ``` ### Crash in torch/csrc/jit/serialization/source_range_serialization.cpp:211 [crash-5598d386057152f606bfa69d85605499e8852625.zip](https://github.com/pytorch/pytorch/files/11552952/crash-5598d386057152f606bfa69d85605499e8852625.zip) ```asan "#0 torch::jit::ConcreteSourceRangeUnpickler::unpickle (this=0x99b8d80) at /pytorch/torch/csrc/jit/serialization/source_range_serialization.cpp:211", "#1 0x0000000004042566 in torch::jit::ConcreteSourceRangeUnpickler::findSourceRangeThatGenerated (this=0x99aa1c0, range=...) at /pytorch/torch/csrc/jit/serialization/source_range_serialization.cpp:229", "#2 0x00000000007b5cc8 in torch::jit::Source::findSourceRangeThatGenerated (this=<optimized out>, range=...) at /pytorch/torch/csrc/jit/frontend/source_range.cpp:144", "#3 torch::jit::SourceRange::findSourceRangeThatGenerated (this=0x7fffffffa650) at /pytorch/torch/csrc/jit/frontend/source_range.h:384", "#4 torch::jit::SourceRange::highlight (this=0x7fffffffa650, out=...) at /pytorch/torch/csrc/jit/frontend/source_range.cpp:149", "#5 0x00000000007a0e74 in torch::jit::Lexer::expected (this=this@entry=0x99979a0, what=..., t=...) at /pytorch/torch/csrc/jit/frontend/lexer.h:461", "#6 0x000000000079fcaa in torch::jit::Lexer::lexRaw (this=this@entry=0x99979a0, whitespace_token=false) at /pytorch/torch/csrc/jit/frontend/lexer.h:552", "#7 0x000000000079fd23 in torch::jit::Lexer::lex (this=this@entry=0x99979a0) at /pytorch/torch/csrc/jit/frontend/lexer.h:487", "#8 0x00000000007a1da1 in torch::jit::Lexer::next (this=this@entry=0x99979a0) at /pytorch/torch/csrc/jit/frontend/lexer.h:436", "#9 0x0000000003bff6a8 in torch::jit::Lexer::nextIf (this=0x99979a0, kind=330) at /pytorch/torch/csrc/jit/frontend/lexer.h:444", "#10 torch::jit::ParserImpl::parseReturnAnnotation (this=this@entry=0x99979a0) at /pytorch/torch/csrc/jit/frontend/parser.cpp:703", "#11 0x0000000003bfd500 in torch::jit::ParserImpl::parseDecl (this=this@entry=0x99979a0) at /pytorch/torch/csrc/jit/frontend/parser.cpp:729", "#12 0x0000000003bfb725 in torch::jit::ParserImpl::parseFunction (this=this@entry=0x99979a0, is_method=true) at /pytorch/torch/csrc/jit/frontend/parser.cpp:755", "#13 0x0000000003bfdc28 in torch::jit::ParserImpl::parseStmt (this=this@entry=0x99979a0, in_class=<optimized out>) at /pytorch/torch/csrc/jit/frontend/parser.cpp:599", "#14 0x0000000003bfd8dd in torch::jit::ParserImpl::parseStatements (this=this@entry=0x99979a0, expect_indent=<optimized out>, in_class=<optimized out>) at /pytorch/torch/csrc/jit/frontend/parser.cpp:697", "#15 0x0000000003bfc4ba in torch::jit::ParserImpl::parseClass (this=0x99979a0) at /pytorch/torch/csrc/jit/frontend/parser.cpp:747", "#16 0x0000000003bfaddc in torch::jit::Parser::parseClass (this=<optimized out>) at /pytorch/torch/csrc/jit/frontend/parser.cpp:812", "#17 0x0000000004008e2d in torch::jit::SourceImporterImpl::parseSourceIfNeeded (this=this@entry=0x95d41f0, qualifier=...) at /pytorch/torch/csrc/jit/serialization/import_source.cpp:182", "#18 0x0000000004008ab7 in torch::jit::SourceImporterImpl::findNamedType (this=this@entry=0x95d41f0, name=...) at /pytorch/torch/csrc/jit/serialization/import_source.cpp:135", "#19 0x000000000400d010 in torch::jit::SourceImporterImpl::resolveType (this=0x95d41f0, name=..., loc=...) at /pytorch/torch/csrc/jit/serialization/import_source.cpp:261", "#20 0x0000000003c20821 in torch::jit::ScriptTypeParser::parseTypeFromExpr (this=this@entry=0x7fffffffb658, expr=...) at /pytorch/torch/csrc/jit/frontend/script_type_parser.cpp:238", "#21 0x0000000003c20acc in torch::jit::ScriptTypeParser::parseType (this=0x7fffffffb658, str=...) at /pytorch/torch/csrc/jit/frontend/script_type_parser.cpp:312", "#22 0x0000000004019416 in torch::jit::SourceImporter::loadType (this=<optimized out>, name=...) at /pytorch/torch/csrc/jit/serialization/import_source.cpp:786", "#23 0x0000000003ff365e in torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)::$_0::operator()(c10::QualifiedName const&) const (this=<optimized out>, qn=...) at /pytorch/torch/csrc/jit/serialization/import.cpp:146", "#24 std::__invoke_impl<c10::StrongTypePtr, torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)::$_0&, c10::QualifiedName const&>(std::__invoke_other, torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)::$_0&, c10::QualifiedName const&) (__f=..., __args=...) at /usr/include/c++/10/bits/invoke.h:60", "#25 std::__invoke_r<c10::StrongTypePtr, torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)::$_0&, c10::QualifiedName const&>(torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)::$_0&, c10::QualifiedName const&) (__fn=..., __args=...) at /usr/include/c++/10/bits/invoke.h:113", "#26 std::_Function_handler<c10::StrongTypePtr (c10::QualifiedName const&), torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)::$_0>::_M_invoke(std::_Any_data const&, c10::QualifiedName const&) (__functor=..., __args=...) at /usr/include/c++/10/bits/std_function.h:291", "#27 0x000000000404e5c4 in std::function<c10::StrongTypePtr (c10::QualifiedName const&)>::operator()(c10::QualifiedName const&) const (this=0x7fffffffbf28, __args=...) at /usr/include/c++/10/bits/std_function.h:622", "#28 torch::jit::Unpickler::readGlobal (this=this@entry=0x7fffffffbd50, module_name=..., class_name=...) at /pytorch/torch/csrc/jit/serialization/unpickler.cpp:820", "#29 0x0000000004049ce5 in torch::jit::Unpickler::readInstruction (this=this@entry=0x7fffffffbd50) at /pytorch/torch/csrc/jit/serialization/unpickler.cpp:496", "#30 0x00000000040497a8 in torch::jit::Unpickler::run (this=0x7fffffffbd50) at /pytorch/torch/csrc/jit/serialization/unpickler.cpp:251", "#31 0x00000000040494f9 in torch::jit::Unpickler::parse_ivalue (this=0x99aa1c0) at /pytorch/torch/csrc/jit/serialization/unpickler.cpp:204", "#32 0x00000000040075f8 in torch::jit::readArchiveAndTensors(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, c10::optional<std::function<c10::StrongTypePtr (c10::QualifiedName const&)> >, c10::optional<std::function<c10::intrusive_ptr<c10::ivalue::Object, c10::detail::intrusive_target_default_null_type<c10::ivalue::Object> > (c10::StrongTypePtr, c10::IValue)> >, c10::optional<c10::Device>, caffe2::serialize::PyTorchStreamReader&, c10::Type::SingletonOrSharedTypePtr<c10::Type> (*)(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&), std::shared_ptr<torch::jit::DeserializationStorageContext>) (archive_name=..., pickle_prefix=..., tensor_prefix=..., type_resolver=..., obj_loader=..., device=..., stream_reader=..., type_parser=0x0, storage_context=...) at /pytorch/torch/csrc/jit/serialization/import_read.cpp:53", "#33 0x0000000003ff3545 in torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive (this=this@entry=0x7fffffffc2b8, archive_name=...) at /pytorch/torch/csrc/jit/serialization/import.cpp:184", "#34 0x0000000003fed8bf in torch::jit::(anonymous namespace)::ScriptModuleDeserializer::deserialize (this=this@entry=0x7fffffffc2b8, device=device@entry=..., extra_files=..., restore_shapes=220) at /pytorch/torch/csrc/jit/serialization/import.cpp:287", "#35 0x0000000003febb0f in torch::jit::import_ir_module (cu=..., in=..., device=..., device@entry=..., extra_files=..., load_debug_files=true, restore_shapes=<optimized out>) at /pytorch/torch/csrc/jit/serialization/import.cpp:386", "#36 0x0000000003feb7a1 in torch::jit::import_ir_module (cu=..., in=..., device=..., device@entry=..., load_debug_files=false) at /pytorch/torch/csrc/jit/serialization/import.cpp:322", "#37 0x0000000003ff015a in torch::jit::load (in=..., device=device@entry=..., load_debug_files=true) at /pytorch/torch/csrc/jit/serialization/import.cpp:482", "#38 0x00000000004a1655 in LLVMFuzzerTestOneInput (data=0x981a680 \"PK\\003\\004\", size=1609) at /load.cc:42", "#39 0x00000000004a1dbf in main ()" ``` ### Segmentation fault in /pytorch/aten/src/ATen/core/ivalue.h:526 [crash-9bd059c1ae85ab9cdb41d786932214d942baa189.zip](https://github.com/pytorch/pytorch/files/11552956/crash-9bd059c1ae85ab9cdb41d786932214d942baa189.zip) ```asan "==8528==ERROR: AddressSanitizer: SEGV on unknown address (pc 0x00000e55d97e bp 0x7fffffffb4d0 sp 0x7fffffffb360 T0)", "==8528==The signal is caused by a READ memory access.", "==8528==Hint: this fault was caused by a dereference of a high value address (see register values below). Disassemble the provided pc to learn which register was used.", " #0 0xe55d97e in c10::IValue::isTuple() const /pytorch/aten/src/ATen/core/ivalue.h:526:26", " #1 0xe55d97e in torch::distributed::rpc::GloballyUniqueId::fromIValue(c10::IValue const&) /pytorch/torch/csrc/distributed/rpc/types.cpp:60:3", " #2 0xe4b04fb in torch::distributed::rpc::ScriptRemoteCall::fromIValues(std::vector<c10::IValue, std::allocator<c10::IValue> >&) /pytorch/torch/csrc/distributed/rpc/script_remote_call.cpp:33:20", " #3 0xe4b1ed5 in torch::distributed::rpc::ScriptRemoteCall::fromMessage(torch::distributed::rpc::Message const&) /pytorch/torch/csrc/distributed/rpc/script_remote_call.cpp:80:10", " #4 0xe55f8a0 in torch::distributed::rpc::deserializeRequest(torch::distributed::rpc::Message const&) /pytorch/torch/csrc/distributed/rpc/utils.cpp:108:14", " #5 0x6120a8 in LLVMFuzzerTestOneInput /message_deserialize.cc:192:27", " #6 0x535de1 in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) /llvm-project-llvmorg-14.0.6/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:611:15", " #7 0x51fcec in fuzzer::RunOneTest(fuzzer::Fuzzer*, char const*, unsigned long) /llvm-project-llvmorg-14.0.6/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:324:6", " #8 0x525a3b in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /llvm-project-llvmorg-14.0.6/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:860:9", " #9 0x54eff2 in main /llvm-project-llvmorg-14.0.6/compiler-rt/lib/fuzzer/FuzzerMain.cpp:20:10", " #10 0x7ffff7a37082 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x24082) (BuildId: 1878e6b475720c7c51969e69ab2d276fae6d1dee)", " #11 0x51a60d in _start (/message_deserialize_fuzz+0x51a60d)", "", "AddressSanitizer can not provide additional info.", "SUMMARY: AddressSanitizer: SEGV /pytorch/aten/src/ATen/core/ivalue.h:526:26 in c10::IValue::isTuple() const", "==8528==ABORTING" ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/102156 Approved by: https://github.com/ezyang
257 lines
8.4 KiB
C++
257 lines
8.4 KiB
C++
#include <torch/csrc/jit/serialization/source_range_serialization.h>
|
|
#include <torch/csrc/jit/serialization/source_range_serialization_impl.h>
|
|
|
|
#include <c10/util/Exception.h>
|
|
#include <c10/util/Flags.h>
|
|
#include <torch/csrc/jit/mobile/type_parser.h>
|
|
#include <torch/csrc/jit/serialization/pickle.h>
|
|
#include <algorithm>
|
|
|
|
namespace torch::jit {
|
|
|
|
// "Whether to emit compact debug_pkl when saving a model to .pt file."
|
|
// "Compact file is smaller but cannot be loaded by old torch binaries."
|
|
// TODO(qihan) remove when all binaries are using string table.
|
|
thread_local bool should_use_format_with_string_table_ = true;
|
|
|
|
class SourceRangeSerializer {
|
|
public:
|
|
// Serialize SourceRange as Tuple[SourceType, int, int]
|
|
// where SourceType = Tuple[int, int, int, List[int]],
|
|
// The first 2 ints are positions into the vector returned by textSaved
|
|
// after all the Ranges are processed. textSaved() returns a vector of str
|
|
// the serialized form of Source
|
|
c10::IValue serialize(const SourceRange& sr);
|
|
|
|
const std::vector<c10::IValue>& texts_saved() {
|
|
return texts_;
|
|
}
|
|
|
|
SourceRangeSerializer() {
|
|
texts_.emplace_back("");
|
|
text_to_idx_[texts_.back().toStringRef()] = 0;
|
|
}
|
|
|
|
private:
|
|
// Serialize Source as Tuple[str, Optional[str], int, List[int]]
|
|
// This caches serialized sources, since many SourceRanges can
|
|
// refer to the same one.
|
|
c10::IValue serialize_source(const std::shared_ptr<Source>& s);
|
|
std::unordered_map<std::shared_ptr<Source>, c10::IValue> serialized_sources;
|
|
|
|
int64_t store_text_and_get_index(const std::string& text_view);
|
|
|
|
std::vector<c10::IValue> texts_;
|
|
std::unordered_map<c10::string_view, int64_t> text_to_idx_;
|
|
};
|
|
|
|
SourceRange SourceRangeDeserializer::deserialize(const c10::IValue& iv) {
|
|
const auto& tup_elems = iv.toTupleRef().elements();
|
|
TORCH_INTERNAL_ASSERT(tup_elems.size() == 3);
|
|
std::shared_ptr<Source> source_ = deserialize_source(tup_elems[0]);
|
|
int64_t start_ = tup_elems[1].toInt();
|
|
int64_t end_ = tup_elems[2].toInt();
|
|
return SourceRange(source_, start_, end_);
|
|
}
|
|
|
|
std::shared_ptr<Source> SourceRangeDeserializer::deserialize_source(
|
|
const c10::IValue& iv) {
|
|
auto tup = iv.toTuple();
|
|
auto it = cached_sources.find(tup);
|
|
if (it != cached_sources.end()) {
|
|
return it->second;
|
|
}
|
|
std::shared_ptr<Source> source;
|
|
const auto& tup_elems = tup->elements();
|
|
TORCH_INTERNAL_ASSERT(tup_elems.size() == 3);
|
|
if (!text_table_.empty()) {
|
|
const auto& textIndex = tup_elems[0].toIntList();
|
|
int64_t fnameIndex = tup_elems[1].toInt();
|
|
int64_t starting_line_no_ = tup_elems[2].toInt();
|
|
c10::optional<std::string> filename = c10::nullopt;
|
|
|
|
filename = *text_table_[fnameIndex];
|
|
|
|
std::vector<c10::string_view> pieces;
|
|
std::vector<std::shared_ptr<std::string>> strs;
|
|
|
|
for (int64_t i : textIndex) {
|
|
pieces.emplace_back(*text_table_[i]);
|
|
strs.emplace_back(text_table_[i]);
|
|
}
|
|
|
|
StringCordView str_cord(std::move(pieces), std::move(strs));
|
|
|
|
source = std::make_shared<Source>(str_cord, filename, starting_line_no_);
|
|
} else {
|
|
std::string text_ = tup_elems[0].toStringRef();
|
|
c10::optional<std::string> filename_ =
|
|
tup_elems[1].toOptional<std::string>();
|
|
int64_t starting_line_no_ = tup_elems[2].toInt();
|
|
source = std::make_shared<Source>(
|
|
std::move(text_), std::move(filename_), starting_line_no_);
|
|
}
|
|
cached_sources[tup] = source;
|
|
return source;
|
|
}
|
|
|
|
c10::IValue SourceRangeSerializer::serialize(const SourceRange& sr) {
|
|
return c10::ivalue::Tuple::create(
|
|
serialize_source(sr.source()), (int64_t)sr.start(), (int64_t)sr.end());
|
|
}
|
|
|
|
int64_t SourceRangeSerializer::store_text_and_get_index(
|
|
const std::string& text_view) {
|
|
auto text_iter = text_to_idx_.find(text_view);
|
|
if (text_iter == text_to_idx_.end()) {
|
|
int64_t text_pos = static_cast<int64_t>(texts_.size());
|
|
texts_.emplace_back(text_view);
|
|
text_to_idx_[texts_.back().toStringView()] = text_pos;
|
|
return text_pos;
|
|
} else {
|
|
return text_iter->second;
|
|
}
|
|
}
|
|
|
|
c10::IValue SourceRangeSerializer::serialize_source(
|
|
const std::shared_ptr<Source>& s) {
|
|
if (serialized_sources.count(s)) {
|
|
return serialized_sources.at(s);
|
|
}
|
|
c10::intrusive_ptr<c10::ivalue::Tuple> serialized;
|
|
c10::List<int64_t> lines;
|
|
if (should_use_format_with_string_table_) {
|
|
if (s == nullptr) {
|
|
serialized = c10::ivalue::Tuple::create({lines, 0, 0});
|
|
} else {
|
|
for (size_t lineno = 0; lineno < s->num_lines(); lineno++) {
|
|
std::string line_content = s->get_line(lineno).str();
|
|
int64_t text_pos = store_text_and_get_index(line_content);
|
|
lines.push_back(text_pos);
|
|
}
|
|
|
|
int64_t fname_pos = 0;
|
|
if (s->filename().has_value()) {
|
|
fname_pos = store_text_and_get_index(*s->filename());
|
|
}
|
|
serialized = c10::ivalue::Tuple::create(
|
|
{lines, fname_pos, (int64_t)s->starting_line_no()});
|
|
}
|
|
} else {
|
|
if (s == nullptr) {
|
|
serialized = c10::ivalue::Tuple::create({"", "", 0});
|
|
} else {
|
|
serialized = c10::ivalue::Tuple::create(
|
|
{s->text_str().str(), s->filename(), (int64_t)s->starting_line_no()});
|
|
}
|
|
}
|
|
serialized_sources[s] = serialized;
|
|
return serialized;
|
|
}
|
|
|
|
SourceRangePickler::SourceRangePickler() : srs(new SourceRangeSerializer()) {}
|
|
|
|
std::vector<char> SourceRangePickler::pickle(
|
|
const SourceRangeRecords& ranges,
|
|
const SourceRangeTagMap& source_range_tags) {
|
|
std::vector<c10::IValue> ivalues;
|
|
for (const auto& range : ranges) {
|
|
int64_t source_range_tag{-1};
|
|
const auto& it = source_range_tags.find(range.range);
|
|
if (it != source_range_tags.end()) {
|
|
source_range_tag = it->second;
|
|
}
|
|
|
|
ivalues.emplace_back(c10::ivalue::Tuple::create(
|
|
{(int64_t)range.bytes,
|
|
srs->serialize(range.range),
|
|
static_cast<int64_t>(source_range_tag)}));
|
|
}
|
|
|
|
std::vector<at::Tensor> table;
|
|
auto textTable = c10::ivalue::Tuple::create(srs->texts_saved());
|
|
auto ivalue = c10::ivalue::Tuple::create(std::move(ivalues));
|
|
std::vector<char> result;
|
|
if (should_use_format_with_string_table_) {
|
|
result = jit::pickle(
|
|
c10::ivalue::Tuple::create({kFormatWithStringTable, textTable, ivalue}),
|
|
&table);
|
|
} else {
|
|
result = jit::pickle(ivalue, &table);
|
|
}
|
|
TORCH_CHECK(table.empty(), "Expected 0 tensors to be written");
|
|
return result;
|
|
}
|
|
|
|
ConcreteSourceRangeUnpickler::ConcreteSourceRangeUnpickler(
|
|
at::DataPtr&& data,
|
|
size_t size)
|
|
: data(std::move(data)),
|
|
size(size),
|
|
deserializer(nullptr),
|
|
unpickled_records(nullptr) {}
|
|
|
|
void ConcreteSourceRangeUnpickler::unpickle() {
|
|
std::lock_guard<std::mutex> guard(mutex);
|
|
if (unpickled_records) {
|
|
return;
|
|
}
|
|
|
|
auto ivaluesTuple = jit::unpickle(
|
|
reinterpret_cast<const char*>(data.get()),
|
|
size,
|
|
nullptr,
|
|
{},
|
|
c10::parseType)
|
|
.toTuple();
|
|
|
|
const auto& ivalues = ivaluesTuple->elements();
|
|
TORCH_CHECK(
|
|
ivalues.size(), "Invalid unpickle operation: empty ivalues tuple");
|
|
unpickled_records = std::make_shared<SourceRangeRecords>();
|
|
IValue lines;
|
|
if (ivalues[0].isString() &&
|
|
kFormatWithStringTable == ivalues[0].toStringRef()) {
|
|
deserializer.reset(new SourceRangeDeserializer(ivalues[1]));
|
|
lines = ivalues[2];
|
|
} else {
|
|
deserializer.reset(new SourceRangeDeserializer());
|
|
lines = ivaluesTuple;
|
|
}
|
|
for (auto& val : lines.toTuple()->elements()) {
|
|
const auto& tup_elems = val.toTupleRef().elements();
|
|
int64_t offset = tup_elems[kByteOffsetIndex].toInt();
|
|
auto source_range = deserializer->deserialize(tup_elems[kSourceRangeIndex]);
|
|
unpickled_records->emplace_back(offset, std::move(source_range));
|
|
}
|
|
}
|
|
|
|
c10::optional<SourceRange> ConcreteSourceRangeUnpickler::
|
|
findSourceRangeThatGenerated(const SourceRange& range) {
|
|
unpickle();
|
|
|
|
auto query = TaggedRange(range.start(), SourceRange{});
|
|
auto entry = std::upper_bound(
|
|
unpickled_records->begin(),
|
|
unpickled_records->end(),
|
|
query,
|
|
[](const TaggedRange& a, const TaggedRange& b) -> bool {
|
|
return a.bytes < b.bytes;
|
|
});
|
|
|
|
// NB: must decrement iterator since upper_bound finds the element
|
|
// *greater than* the query.
|
|
if (entry != unpickled_records->begin()) {
|
|
return (entry - 1)->range;
|
|
}
|
|
|
|
return c10::nullopt;
|
|
}
|
|
|
|
TORCH_API void setShouldUseFormatWithStringTable(
|
|
bool should_use_format_with_string_table) {
|
|
should_use_format_with_string_table_ = should_use_format_with_string_table;
|
|
}
|
|
|
|
} // namespace torch::jit
|