|
|
|
@ -1,386 +0,0 @@
|
|
|
|
|
#include <google/protobuf/util/json_util.h>
|
|
|
|
|
#include <google/protobuf/util/type_resolver_util.h>
|
|
|
|
|
|
|
|
|
|
#include <torch/csrc/jit/frontend/script_type_parser.h>
|
|
|
|
|
#include <torch/csrc/jit/serialization/import_export_helpers.h>
|
|
|
|
|
#include <torch/csrc/jit/serialization/import_legacy.h>
|
|
|
|
|
#include <torch/csrc/jit/serialization/import_source.h>
|
|
|
|
|
#include <torch/csrc/jit/serialization/pickle.h>
|
|
|
|
|
#include <torch/csrc/jit/serialization/source_range_serialization.h>
|
|
|
|
|
#include <torch/csrc/jit/serialization/source_range_serialization_impl.h>
|
|
|
|
|
|
|
|
|
|
#include <caffe2/core/common.h>
|
|
|
|
|
#include <caffe2/core/types.h>
|
|
|
|
|
#include <caffe2/proto/caffe2_pb.h>
|
|
|
|
|
#include <caffe2/proto/torch.pb.h>
|
|
|
|
|
#include <caffe2/serialize/inline_container.h>
|
|
|
|
|
|
|
|
|
|
#include <ATen/ATen.h>
|
|
|
|
|
#include <c10/util/irange.h>
|
|
|
|
|
|
|
|
|
|
namespace torch::jit {
|
|
|
|
|
|
|
|
|
|
using caffe2::serialize::PyTorchStreamReader;
|
|
|
|
|
void postSetStateValidate(const IValue& v);
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
|
|
struct ClassResolver : public Resolver {
|
|
|
|
|
explicit ClassResolver(const SourceImporter& source_importer)
|
|
|
|
|
: source_importer_(source_importer) {}
|
|
|
|
|
TypePtr resolveType(const std::string& name, const SourceRange& loc)
|
|
|
|
|
override {
|
|
|
|
|
return source_importer_.loadType(c10::QualifiedName(name));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
SourceImporter source_importer_;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
class ScriptModuleDeserializer final {
|
|
|
|
|
public:
|
|
|
|
|
ScriptModuleDeserializer(
|
|
|
|
|
std::shared_ptr<CompilationUnit> cu,
|
|
|
|
|
std::shared_ptr<PyTorchStreamReader> reader,
|
|
|
|
|
const std::optional<at::Device>& device)
|
|
|
|
|
: compilation_unit_(std::move(cu)),
|
|
|
|
|
reader_(std::move(reader)),
|
|
|
|
|
device_(device),
|
|
|
|
|
source_importer_(
|
|
|
|
|
compilation_unit_,
|
|
|
|
|
&constant_table_,
|
|
|
|
|
[this](const std::string& qualifier) {
|
|
|
|
|
return findSourceInArchiveFromQualifier(
|
|
|
|
|
*reader_, export_prefix_, qualifier);
|
|
|
|
|
},
|
|
|
|
|
reader_->version()) {
|
|
|
|
|
for (auto& constant : constant_table_) {
|
|
|
|
|
TORCH_INTERNAL_ASSERT(constant.isTensor(), " expected a tensor");
|
|
|
|
|
tensor_table_.emplace_back(std::move(constant).toTensor());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Module LEGACY_deserialize();
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
at::Tensor LEGACY_loadTensor(
|
|
|
|
|
const torch::TensorDef& tensor_proto,
|
|
|
|
|
std::unordered_map<std::string, at::Storage>& storageMap);
|
|
|
|
|
void LEGACY_loadTensorTable(torch::ModelDef* model_def);
|
|
|
|
|
void LEGACY_moduleSetState(const Module& module, IValue state);
|
|
|
|
|
IValue LEGACY_loadPickleArchive(const std::string& name);
|
|
|
|
|
Module LEGACY_convertModule(const torch::ModuleDef& module_def);
|
|
|
|
|
|
|
|
|
|
std::vector<IValue> LEGACY_pickled_ivalues_;
|
|
|
|
|
std::vector<std::string> LEGACY_moduleStack_;
|
|
|
|
|
|
|
|
|
|
std::shared_ptr<Source> sourceLoader(const std::string& qualifier);
|
|
|
|
|
|
|
|
|
|
std::shared_ptr<CompilationUnit> compilation_unit_;
|
|
|
|
|
std::shared_ptr<PyTorchStreamReader> reader_;
|
|
|
|
|
std::optional<at::Device> device_;
|
|
|
|
|
// Legacy only tensor can be a constant.
|
|
|
|
|
std::vector<at::IValue> constant_table_;
|
|
|
|
|
std::vector<at::Tensor> tensor_table_;
|
|
|
|
|
SourceImporter source_importer_;
|
|
|
|
|
std::string export_prefix_ = "code/";
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
Module ScriptModuleDeserializer::LEGACY_deserialize() {
|
|
|
|
|
torch::ModelDef model_def;
|
|
|
|
|
|
|
|
|
|
auto [data_ptr, data_size] = reader_->getRecord("model.json");
|
|
|
|
|
// NB: cannot use JsonStringToMessage, since fbcode's protobuf is too old
|
|
|
|
|
// be consistent with JsonStringToMessage
|
|
|
|
|
std::string url_prefix = "type.googleapis.com";
|
|
|
|
|
std::unique_ptr<::google::protobuf::util::TypeResolver> resolver(
|
|
|
|
|
::google::protobuf::util::NewTypeResolverForDescriptorPool(
|
|
|
|
|
url_prefix, model_def.GetDescriptor()->file()->pool()));
|
|
|
|
|
std::string json_string = std::string(
|
|
|
|
|
static_cast<char*>(data_ptr.get()),
|
|
|
|
|
static_cast<char*>(data_ptr.get()) + data_size);
|
|
|
|
|
std::string binary_string;
|
|
|
|
|
::google::protobuf::util::JsonParseOptions opts;
|
|
|
|
|
opts.ignore_unknown_fields = true;
|
|
|
|
|
auto convert_result = ::google::protobuf::util::JsonToBinaryString(
|
|
|
|
|
resolver.get(),
|
|
|
|
|
url_prefix + "/" + model_def.GetDescriptor()->full_name(),
|
|
|
|
|
json_string,
|
|
|
|
|
&binary_string,
|
|
|
|
|
opts);
|
|
|
|
|
if (!convert_result.ok()) {
|
|
|
|
|
std::stringstream ss;
|
|
|
|
|
ss << convert_result;
|
|
|
|
|
AT_ERROR(ss.str());
|
|
|
|
|
}
|
|
|
|
|
AT_ASSERTM(
|
|
|
|
|
model_def.ParseFromString(binary_string),
|
|
|
|
|
"JSON transcoder produced invalid protobuf output.");
|
|
|
|
|
auto proto_version = model_def.proto_version();
|
|
|
|
|
export_prefix_ = "libs/";
|
|
|
|
|
|
|
|
|
|
LEGACY_loadTensorTable(&model_def);
|
|
|
|
|
AT_ASSERT(proto_version < 6);
|
|
|
|
|
if (proto_version == 2) {
|
|
|
|
|
const auto& list = LEGACY_loadPickleArchive("attributes.pkl").toList();
|
|
|
|
|
LEGACY_pickled_ivalues_.insert(
|
|
|
|
|
LEGACY_pickled_ivalues_.end(), list.begin(), list.end());
|
|
|
|
|
} else if (proto_version >= 3) {
|
|
|
|
|
LEGACY_pickled_ivalues_ =
|
|
|
|
|
std::move(*LEGACY_loadPickleArchive("attributes.pkl").toTuple())
|
|
|
|
|
.elements()
|
|
|
|
|
.vec();
|
|
|
|
|
}
|
|
|
|
|
LEGACY_moduleStack_.emplace_back("__torch__");
|
|
|
|
|
const auto& module_def = model_def.main_module();
|
|
|
|
|
|
|
|
|
|
// Move tensors in constant table.
|
|
|
|
|
for (auto& tensor : tensor_table_) {
|
|
|
|
|
constant_table_.emplace_back(IValue(std::move(tensor)));
|
|
|
|
|
}
|
|
|
|
|
return LEGACY_convertModule(module_def);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
IValue ScriptModuleDeserializer::LEGACY_loadPickleArchive(
|
|
|
|
|
const std::string& name) {
|
|
|
|
|
auto [attributes_ptr, attributes_size] = reader_->getRecord(name);
|
|
|
|
|
auto ivalue = unpickle(
|
|
|
|
|
reinterpret_cast<const char*>(attributes_ptr.get()),
|
|
|
|
|
attributes_size,
|
|
|
|
|
[&](const c10::QualifiedName& qn) {
|
|
|
|
|
auto cls = source_importer_.loadType(qn)->expect<ClassType>();
|
|
|
|
|
return c10::StrongTypePtr(compilation_unit_, std::move(cls));
|
|
|
|
|
},
|
|
|
|
|
tensor_table_);
|
|
|
|
|
return ivalue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ScriptModuleDeserializer::LEGACY_loadTensorTable(
|
|
|
|
|
torch::ModelDef* model_def) {
|
|
|
|
|
std::unordered_map<std::string, at::Storage> storageMap;
|
|
|
|
|
for (const torch::TensorDef& tensor : model_def->tensors()) {
|
|
|
|
|
tensor_table_.emplace_back(LEGACY_loadTensor(tensor, storageMap));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
at::Tensor ScriptModuleDeserializer::LEGACY_loadTensor(
|
|
|
|
|
const torch::TensorDef& tensor_proto,
|
|
|
|
|
std::unordered_map<std::string, at::Storage>& storageMap) {
|
|
|
|
|
std::vector<int64_t> dims(
|
|
|
|
|
tensor_proto.dims().begin(), tensor_proto.dims().end());
|
|
|
|
|
std::vector<int64_t> strides(
|
|
|
|
|
tensor_proto.strides().begin(), tensor_proto.strides().end());
|
|
|
|
|
auto type = at::typeMetaToScalarType(
|
|
|
|
|
caffe2::DataTypeToTypeMeta(tensor_proto.data_type()));
|
|
|
|
|
if (tensor_proto.is_quantized()) {
|
|
|
|
|
type = toQIntType(type);
|
|
|
|
|
}
|
|
|
|
|
const std::string& record_key = tensor_proto.data().key();
|
|
|
|
|
AT_ASSERT(tensor_proto.has_device() && !tensor_proto.device().empty());
|
|
|
|
|
at::Device device(tensor_proto.device());
|
|
|
|
|
if (device_.has_value()) {
|
|
|
|
|
// override the device, if user provides map_location
|
|
|
|
|
device = device_.value();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto storage_it = storageMap.find(record_key);
|
|
|
|
|
if (storage_it == storageMap.end()) {
|
|
|
|
|
auto [storage_ptr, record_size] = reader_->getRecord(record_key);
|
|
|
|
|
auto cpu_storage = at::Storage(
|
|
|
|
|
c10::Storage::use_byte_size_t(),
|
|
|
|
|
record_size,
|
|
|
|
|
std::move(storage_ptr),
|
|
|
|
|
/*allocator=*/nullptr,
|
|
|
|
|
/*resizable=*/false); // NB: we didn't set any allocator for the tensor
|
|
|
|
|
if (device.is_cpu()) {
|
|
|
|
|
storage_it =
|
|
|
|
|
storageMap.insert(std::make_pair(record_key, cpu_storage)).first;
|
|
|
|
|
} else if (device.is_cuda()) {
|
|
|
|
|
at::Tensor cpu_tensor =
|
|
|
|
|
at::empty({0}, at::CPU(type).options()).set_(cpu_storage);
|
|
|
|
|
at::Storage cuda_storage =
|
|
|
|
|
cpu_tensor.to(device, cpu_tensor.scalar_type()).storage();
|
|
|
|
|
storage_it =
|
|
|
|
|
storageMap.insert(std::make_pair(record_key, cuda_storage)).first;
|
|
|
|
|
} else {
|
|
|
|
|
AT_ERROR(
|
|
|
|
|
"supported devices include CPU and CUDA, however got ",
|
|
|
|
|
DeviceTypeName(device.type(), false));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (storage_it->second.device().type() != device.type() ||
|
|
|
|
|
(device.has_index() &&
|
|
|
|
|
storage_it->second.device().index() != device.index())) {
|
|
|
|
|
std::stringstream oss;
|
|
|
|
|
oss << "storage previously was specified with device "
|
|
|
|
|
<< storage_it->second.device() << "but now is specified with device "
|
|
|
|
|
<< device << std::endl;
|
|
|
|
|
AT_ERROR(oss.str());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
at::Tensor result;
|
|
|
|
|
|
|
|
|
|
if (device.is_cpu()) {
|
|
|
|
|
if (tensor_proto.is_quantized()) {
|
|
|
|
|
result =
|
|
|
|
|
at::_empty_affine_quantized(
|
|
|
|
|
{0}, type, tensor_proto.scale(), tensor_proto.zero_point())
|
|
|
|
|
.set_(storage_it->second, tensor_proto.offset(), dims, strides);
|
|
|
|
|
} else {
|
|
|
|
|
result =
|
|
|
|
|
at::empty({0}, at::CPU(type).options())
|
|
|
|
|
.set_(storage_it->second, tensor_proto.offset(), dims, strides);
|
|
|
|
|
}
|
|
|
|
|
} else if (device.is_cuda()) {
|
|
|
|
|
result =
|
|
|
|
|
at::empty(
|
|
|
|
|
{0}, c10::TensorOptions(type).device(storage_it->second.device()))
|
|
|
|
|
.set_(storage_it->second, tensor_proto.offset(), dims, strides);
|
|
|
|
|
}
|
|
|
|
|
AT_ASSERT(result.defined());
|
|
|
|
|
|
|
|
|
|
result = autograd::make_variable(result, tensor_proto.requires_grad());
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ScriptModuleDeserializer::LEGACY_moduleSetState(
|
|
|
|
|
const Module& module,
|
|
|
|
|
IValue state) {
|
|
|
|
|
auto setstate = module.find_method("__setstate__");
|
|
|
|
|
|
|
|
|
|
TORCH_CHECK(
|
|
|
|
|
setstate,
|
|
|
|
|
"Cannot call '__setstate__' method because"
|
|
|
|
|
" it does not exist");
|
|
|
|
|
|
|
|
|
|
// Since all Tensors are going to be None before `__setstate__` is run, we
|
|
|
|
|
// can't do any optimizations on them that depend on the module type since the
|
|
|
|
|
// values aren't consistent with their corresponding types.
|
|
|
|
|
GraphOptimizerEnabledGuard guard(false);
|
|
|
|
|
|
|
|
|
|
// TODO: once modules are first class in the interpreter and methods are not
|
|
|
|
|
// lowered, change this to `module->run_method("__setstate__", {state});`
|
|
|
|
|
if (setstate->num_inputs() == 1) {
|
|
|
|
|
setstate->run({module._ivalue()});
|
|
|
|
|
} else if (setstate->num_inputs() == 2) {
|
|
|
|
|
setstate->run({module._ivalue(), std::move(state)});
|
|
|
|
|
} else {
|
|
|
|
|
AT_ERROR("Unexpected schema on '__setstate__'");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Module ScriptModuleDeserializer::LEGACY_convertModule(
|
|
|
|
|
const torch::ModuleDef& module_def) {
|
|
|
|
|
// HACK: The current model exporter can create module_defs with invalid Python
|
|
|
|
|
// identifiers as names (they contain `.`)
|
|
|
|
|
const auto atoms = c10::QualifiedName(module_def.name()).atoms();
|
|
|
|
|
const size_t numPushed = atoms.size();
|
|
|
|
|
for (const auto& atom : atoms) {
|
|
|
|
|
auto is_digits = [](const std::string& str) {
|
|
|
|
|
return std::all_of(str.begin(), str.end(), ::isdigit);
|
|
|
|
|
};
|
|
|
|
|
auto sanitized = is_digits(atom) ? std::string("_") + atom : atom;
|
|
|
|
|
LEGACY_moduleStack_.emplace_back(sanitized);
|
|
|
|
|
}
|
|
|
|
|
auto module =
|
|
|
|
|
Module(c10::QualifiedName(LEGACY_moduleStack_), compilation_unit_);
|
|
|
|
|
for (const auto i : c10::irange(module_def.submodules_size())) {
|
|
|
|
|
const torch::ModuleDef& sub_def = module_def.submodules(i);
|
|
|
|
|
auto submodule = LEGACY_convertModule(sub_def);
|
|
|
|
|
module.register_module(sub_def.name(), submodule);
|
|
|
|
|
}
|
|
|
|
|
for (const auto i : c10::irange(module_def.parameters_size())) {
|
|
|
|
|
const torch::ParameterDef& param_def = module_def.parameters(i);
|
|
|
|
|
at::Tensor tensor = constant_table_.at(param_def.tensor_id()).toTensor();
|
|
|
|
|
if (param_def.is_buffer()) {
|
|
|
|
|
module.register_buffer(param_def.name(), tensor);
|
|
|
|
|
} else {
|
|
|
|
|
module.register_parameter(param_def.name(), tensor, /*is_buffer=*/false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ScriptTypeParser typeParser(
|
|
|
|
|
std::make_shared<ClassResolver>(source_importer_));
|
|
|
|
|
for (const auto i : c10::irange(module_def.attributes_size())) {
|
|
|
|
|
const torch::AttributeDef& attr_def = module_def.attributes(i);
|
|
|
|
|
if (module.hasattr(attr_def.name())) {
|
|
|
|
|
// this attribute was already registered as a buffer above.
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
IValue ivalue;
|
|
|
|
|
if (attr_def.id() >= 0) {
|
|
|
|
|
// attribute has no value in the table, set it to None for now. After
|
|
|
|
|
// __getstate__, check that all the attributes that are not Optional
|
|
|
|
|
// can't be None
|
|
|
|
|
ivalue = LEGACY_pickled_ivalues_.at(attr_def.id());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
module.register_attribute(
|
|
|
|
|
attr_def.name(), typeParser.parseType(attr_def.type()), ivalue);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If present, load in the table of source ranges from the original
|
|
|
|
|
// generating code.
|
|
|
|
|
std::shared_ptr<SourceRangeUnpickler> gen_ranges = nullptr;
|
|
|
|
|
if (module_def.has_torchscript_debug_arena()) {
|
|
|
|
|
auto [data, size] =
|
|
|
|
|
reader_->getRecord(module_def.torchscript_debug_arena().key());
|
|
|
|
|
|
|
|
|
|
gen_ranges =
|
|
|
|
|
std::make_shared<ConcreteSourceRangeUnpickler>(std::move(data), size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (module_def.has_torchscript_arena()) {
|
|
|
|
|
auto [data, size] =
|
|
|
|
|
reader_->getRecord(module_def.torchscript_arena().key());
|
|
|
|
|
std::string data_str(static_cast<const char*>(data.get()), size);
|
|
|
|
|
auto src = std::make_shared<Source>(
|
|
|
|
|
std::string(static_cast<const char*>(data.get()), size),
|
|
|
|
|
module_def.torchscript_arena().key(),
|
|
|
|
|
1,
|
|
|
|
|
std::move(gen_ranges));
|
|
|
|
|
|
|
|
|
|
source_importer_.LEGACY_import_methods(module, src);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (module_def.has_get_state_attribute_id()) {
|
|
|
|
|
LEGACY_moduleSetState(
|
|
|
|
|
module,
|
|
|
|
|
LEGACY_pickled_ivalues_.at(module_def.get_state_attribute_id()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const ClassTypePtr& module_type = module._ivalue()->type();
|
|
|
|
|
for (size_t i = 0, N = module_type->numAttributes(); i < N; ++i) {
|
|
|
|
|
// Verify that all the non-optional attributes have been initialized
|
|
|
|
|
// TODO: Issue #20497
|
|
|
|
|
const IValue& v = module._ivalue()->getSlot(i);
|
|
|
|
|
if (module_type->getAttribute(i)->kind() != TypeKind::OptionalType) {
|
|
|
|
|
TORCH_CHECK(
|
|
|
|
|
!v.isNone(),
|
|
|
|
|
"The field '",
|
|
|
|
|
module_type->getAttributeName(i),
|
|
|
|
|
"' was left unitialized after __setstate__, but expected a ",
|
|
|
|
|
"value of type '",
|
|
|
|
|
v.type()->repr_str(),
|
|
|
|
|
"'");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const auto i : c10::irange(numPushed)) {
|
|
|
|
|
(void)i; // Suppress unused variable warning
|
|
|
|
|
LEGACY_moduleStack_.pop_back();
|
|
|
|
|
}
|
|
|
|
|
return module;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
|
|
Module LEGACY_deserialize(
|
|
|
|
|
std::shared_ptr<CompilationUnit> cu,
|
|
|
|
|
std::shared_ptr<caffe2::serialize::PyTorchStreamReader> reader,
|
|
|
|
|
const std::optional<c10::Device>& device) {
|
|
|
|
|
ScriptModuleDeserializer deserializer(
|
|
|
|
|
std::move(cu), std::move(reader), device);
|
|
|
|
|
return deserializer.LEGACY_deserialize();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace torch::jit
|