mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Summary: This adds dummy metadata for frozen builtin packages when using `torch::deploy`. This is a bit hacky but unblocks allows Huggingface transformers library to be used within `torch::deploy` which depends on `importlib.metadata.version` to detect whether torch is installed or not. https://github.com/huggingface/transformers/blob/main/src/transformers/utils/import_utils.py#L49 Pull Request resolved: https://github.com/pytorch/pytorch/pull/76211 Test Plan: Added `importlib.metadata.version("torch")` unit test Reviewed By: kiukchung, PaliC Differential Revision: D35834831 Pulled By: d4l3k fbshipit-source-id: e58365e1ada69299adea96f0ca1fe211e092dd97 (cherry picked from commit c4b4152a24dcdf359503db2112a10a88633e67b6)
285 lines
9.0 KiB
C++
285 lines
9.0 KiB
C++
#include <Python.h>
|
|
#include <c10/util/Exception.h>
|
|
#include <fmt/format.h>
|
|
#include <torch/csrc/deploy/Exception.h>
|
|
#include <torch/csrc/deploy/interpreter/builtin_registry.h>
|
|
|
|
namespace torch {
|
|
namespace deploy {
|
|
|
|
// These numbers of modules should not change as long as the cpython version
|
|
// embedded in the build remains fixed
|
|
static const size_t NUM_FROZEN_PY_BUILTIN_MODULES = 6;
|
|
#ifndef FBCODE_CAFFE2
|
|
static const size_t NUM_FROZEN_PY_STDLIB_MODULES = 680;
|
|
#endif
|
|
|
|
extern "C" PyObject* initModule(void);
|
|
|
|
REGISTER_TORCH_DEPLOY_BUILTIN(cpython_internal, PyImport_FrozenModules);
|
|
|
|
#ifdef FBCODE_CAFFE2
|
|
REGISTER_TORCH_DEPLOY_BUILTIN(frozentorch, nullptr, "torch._C", initModule);
|
|
#else
|
|
extern "C" struct _frozen _PyImport_FrozenModules_torch[];
|
|
REGISTER_TORCH_DEPLOY_BUILTIN(
|
|
frozentorch,
|
|
_PyImport_FrozenModules_torch,
|
|
"torch._C",
|
|
initModule);
|
|
#endif
|
|
|
|
BuiltinRegistryItem::BuiltinRegistryItem(
|
|
const char* _name,
|
|
const struct _frozen* _frozenModules,
|
|
std::vector<std::pair<const char*, void*>>&& _builtinModules)
|
|
: name(_name),
|
|
frozenModules(_frozenModules),
|
|
builtinModules(std::move(_builtinModules)) {
|
|
numModules = 0;
|
|
if (frozenModules) {
|
|
while (frozenModules[numModules].name != nullptr) {
|
|
++numModules;
|
|
}
|
|
}
|
|
|
|
fprintf(
|
|
stderr,
|
|
"torch::deploy builtin %s contains %u modules\n",
|
|
name,
|
|
numModules);
|
|
}
|
|
|
|
BuiltinRegistry* BuiltinRegistry::get() {
|
|
static BuiltinRegistry _registry;
|
|
return &_registry;
|
|
}
|
|
|
|
void BuiltinRegistry::runPreInitialization() {
|
|
TORCH_INTERNAL_ASSERT(!Py_IsInitialized());
|
|
sanityCheck();
|
|
PyImport_FrozenModules = BuiltinRegistry::getAllFrozenModules();
|
|
TORCH_INTERNAL_ASSERT(PyImport_FrozenModules != nullptr);
|
|
|
|
appendCPythonInittab();
|
|
}
|
|
|
|
const char* metaPathSetupTemplate = R"PYTHON(
|
|
import sys
|
|
from importlib.metadata import DistributionFinder, Distribution
|
|
# We need to register a custom meta path finder because we are registering
|
|
# `torch._C` as a builtin module.
|
|
#
|
|
# Normally, builtins will be found by the `BuiltinImporter` meta path finder.
|
|
# However, `BuiltinImporter` is hard-coded to assume that all builtin modules
|
|
# are top-level imports. Since `torch._C` is a submodule of `torch`, the
|
|
# BuiltinImporter skips it.
|
|
class F:
|
|
MODULES = {<<<DEPLOY_BUILTIN_MODULES_CSV>>>}
|
|
|
|
def find_spec(self, fullname, path, target=None):
|
|
if fullname in self.MODULES:
|
|
# Load this module using `BuiltinImporter`, but set `path` to None
|
|
# in order to trick it into loading our module.
|
|
return sys.meta_path[1].find_spec(fullname, path=None, target=None)
|
|
return None
|
|
|
|
def find_distributions(self, context=DistributionFinder.Context()):
|
|
modules = {"torch"} | self.MODULES
|
|
# Insert dummy distribution records for each builtin module so
|
|
# importlib.metadata.version(...) works.
|
|
if context.name is None:
|
|
for name in modules:
|
|
yield DummyDistribution(name)
|
|
if context.name in modules:
|
|
yield DummyDistribution(context.name)
|
|
|
|
class DummyDistribution(Distribution):
|
|
def __init__(self, name):
|
|
self._metadata = {
|
|
"Name": name,
|
|
"Version": "0.0.1+fake_multipy",
|
|
}
|
|
|
|
@property
|
|
def metadata(self):
|
|
return self._metadata
|
|
|
|
sys.meta_path.insert(0, F())
|
|
)PYTHON";
|
|
|
|
void BuiltinRegistry::runPostInitialization() {
|
|
TORCH_INTERNAL_ASSERT(Py_IsInitialized());
|
|
std::string metaPathSetupScript(metaPathSetupTemplate);
|
|
std::string replaceKey = "<<<DEPLOY_BUILTIN_MODULES_CSV>>>";
|
|
size_t pos = metaPathSetupScript.find(replaceKey);
|
|
if (pos != std::string::npos) {
|
|
metaPathSetupScript.replace(pos, replaceKey.size(), getBuiltinModulesCSV());
|
|
}
|
|
int r = PyRun_SimpleString(metaPathSetupScript.c_str());
|
|
TORCH_INTERNAL_ASSERT(r == 0);
|
|
}
|
|
|
|
void BuiltinRegistry::registerBuiltin(
|
|
std::unique_ptr<BuiltinRegistryItem> item) {
|
|
if (get()->name2idx_.find(item->name) != get()->name2idx_.end()) {
|
|
throw std::runtime_error(std::string("redefine bultin: ") + item->name);
|
|
}
|
|
get()->name2idx_[item->name] = get()->items_.size();
|
|
get()->items_.emplace_back(std::move(item));
|
|
}
|
|
|
|
BuiltinRegistryItem* BuiltinRegistry::getItem(const std::string& name) {
|
|
auto itr = get()->name2idx_.find(name);
|
|
return itr == get()->name2idx_.end() ? nullptr
|
|
: get()->items_[itr->second].get();
|
|
}
|
|
|
|
unsigned BuiltinRegistry::totalNumModules() {
|
|
unsigned tot = 0;
|
|
for (const auto& itemptr : get()->items_) {
|
|
tot += itemptr->numModules;
|
|
}
|
|
return tot;
|
|
}
|
|
|
|
struct _frozen* BuiltinRegistry::getAllFrozenModules() {
|
|
/* Allocate new memory for the combined table */
|
|
size_t totNumModules = totalNumModules();
|
|
struct _frozen* p = nullptr;
|
|
if (totNumModules > 0 &&
|
|
totNumModules <= SIZE_MAX / sizeof(struct _frozen) - 1) {
|
|
size_t size = sizeof(struct _frozen) * (totNumModules + 1);
|
|
p = (_frozen*)PyMem_Malloc(size);
|
|
}
|
|
if (p == nullptr) {
|
|
return nullptr;
|
|
}
|
|
|
|
// mark p as an empty frozen module list
|
|
memset(&p[0], 0, sizeof(p[0]));
|
|
|
|
/* Copy the tables into the new memory */
|
|
unsigned off = 0;
|
|
for (const auto& itemptr : items()) {
|
|
if (itemptr->numModules > 0) {
|
|
memcpy(
|
|
p + off,
|
|
itemptr->frozenModules,
|
|
(itemptr->numModules + 1) * sizeof(struct _frozen));
|
|
off += itemptr->numModules;
|
|
}
|
|
}
|
|
|
|
return p;
|
|
}
|
|
|
|
void BuiltinRegistry::sanityCheck() {
|
|
auto* cpythonInternalFrozens = getItem("cpython_internal");
|
|
// Num frozen builtins shouldn't change (unless modifying the underlying
|
|
// cpython version)
|
|
TORCH_INTERNAL_ASSERT(
|
|
cpythonInternalFrozens != nullptr &&
|
|
cpythonInternalFrozens->numModules == NUM_FROZEN_PY_BUILTIN_MODULES,
|
|
"Missing python builtin frozen modules");
|
|
|
|
auto* frozenpython = getItem("frozenpython");
|
|
#ifdef FBCODE_CAFFE2
|
|
TORCH_INTERNAL_ASSERT(
|
|
frozenpython != nullptr, "Missing frozen python modules");
|
|
#else
|
|
auto* frozentorch = getItem("frozentorch");
|
|
// Check frozenpython+frozentorch together since in OSS frozenpython is empty
|
|
// and frozentorch contains stdlib+torch, while in fbcode they are separated
|
|
// due to thirdparty2 frozenpython. No fixed number of torch modules to check
|
|
// for, but there should be at least one.
|
|
TORCH_INTERNAL_ASSERT(
|
|
frozenpython != nullptr && frozentorch != nullptr &&
|
|
frozenpython->numModules + frozentorch->numModules >
|
|
NUM_FROZEN_PY_STDLIB_MODULES + 1,
|
|
"Missing frozen python stdlib or torch modules");
|
|
#endif
|
|
}
|
|
|
|
std::vector<std::pair<const char*, void*>> BuiltinRegistry::
|
|
getAllBuiltinModules() {
|
|
std::vector<std::pair<const char*, void*>> allBuiltinModules;
|
|
for (const auto& itemptr : items()) {
|
|
allBuiltinModules.insert(
|
|
allBuiltinModules.end(),
|
|
itemptr->builtinModules.begin(),
|
|
itemptr->builtinModules.end());
|
|
}
|
|
return allBuiltinModules;
|
|
}
|
|
|
|
void BuiltinRegistry::appendCPythonInittab() {
|
|
for (const auto& pair : get()->getAllBuiltinModules()) {
|
|
PyImport_AppendInittab(
|
|
pair.first, reinterpret_cast<PyObject* (*)()>(pair.second));
|
|
}
|
|
}
|
|
|
|
std::string BuiltinRegistry::getBuiltinModulesCSV() {
|
|
std::string modulesCSV;
|
|
for (const auto& pair : get()->getAllBuiltinModules()) {
|
|
if (!modulesCSV.empty()) {
|
|
modulesCSV += ", ";
|
|
}
|
|
modulesCSV += fmt::format("'{}'", pair.first);
|
|
}
|
|
return modulesCSV;
|
|
}
|
|
|
|
BuiltinRegisterer::BuiltinRegisterer(
|
|
const char* name,
|
|
const struct _frozen* frozenModules...) {
|
|
if (allowLibrary && !allowLibrary(name)) {
|
|
fprintf(
|
|
stderr,
|
|
"Skip %s since it's rejected by the allowLibrary method\n",
|
|
name);
|
|
return;
|
|
}
|
|
// gather builtin modules for this lib
|
|
va_list args;
|
|
va_start(args, frozenModules);
|
|
const char* moduleName = nullptr;
|
|
void* initFn = nullptr;
|
|
std::vector<std::pair<const char*, void*>> builtinModules;
|
|
while (true) {
|
|
moduleName = va_arg(args, const char*);
|
|
// encounter end of sequence
|
|
if (moduleName == nullptr) {
|
|
break;
|
|
}
|
|
initFn = va_arg(args, void*);
|
|
// skip null init function. This can happen if we create weak reference
|
|
// to init functions defined in another library. Depending on if we
|
|
// link with that library, the init function pointer will be the real
|
|
// implementation or nullptr. tensorrt is a good example. If this is
|
|
// a CPU build, we will not link with the tensorrt library, so the init
|
|
// function will be nullptr; on the other hand if this is a GPU build,
|
|
// we link with the tensorrt library, so the init function will not be
|
|
// nullptr.
|
|
if (initFn == nullptr) {
|
|
continue;
|
|
}
|
|
builtinModules.emplace_back(moduleName, initFn);
|
|
}
|
|
|
|
// note: don't call glog api in this method since this method is usually
|
|
// called before glog get setup
|
|
fprintf(
|
|
stderr,
|
|
"Registering torch::deploy builtin library %s (idx %lu) with %lu builtin modules\n",
|
|
name,
|
|
BuiltinRegistry::items().size(),
|
|
builtinModules.size());
|
|
BuiltinRegistry::registerBuiltin(std::make_unique<BuiltinRegistryItem>(
|
|
name, frozenModules, std::move(builtinModules)));
|
|
}
|
|
|
|
} // namespace deploy
|
|
} // namespace torch
|