Files
pytorch/torch/csrc/deploy/interpreter/builtin_registry.cpp
Tristan Rice bbc6fcd730 deploy: add dummy metadata for builtin packages (#76211)
Summary:
This adds dummy metadata for frozen builtin packages when using `torch::deploy`. This is a bit hacky but unblocks allows Huggingface transformers library to be used within `torch::deploy` which depends on `importlib.metadata.version` to detect whether torch is installed or not.

https://github.com/huggingface/transformers/blob/main/src/transformers/utils/import_utils.py#L49

Pull Request resolved: https://github.com/pytorch/pytorch/pull/76211

Test Plan: Added `importlib.metadata.version("torch")` unit test

Reviewed By: kiukchung, PaliC

Differential Revision: D35834831

Pulled By: d4l3k

fbshipit-source-id: e58365e1ada69299adea96f0ca1fe211e092dd97
(cherry picked from commit c4b4152a24dcdf359503db2112a10a88633e67b6)
2022-04-26 01:00:02 +00:00

285 lines
9.0 KiB
C++

#include <Python.h>
#include <c10/util/Exception.h>
#include <fmt/format.h>
#include <torch/csrc/deploy/Exception.h>
#include <torch/csrc/deploy/interpreter/builtin_registry.h>
namespace torch {
namespace deploy {
// These numbers of modules should not change as long as the cpython version
// embedded in the build remains fixed
static const size_t NUM_FROZEN_PY_BUILTIN_MODULES = 6;
#ifndef FBCODE_CAFFE2
static const size_t NUM_FROZEN_PY_STDLIB_MODULES = 680;
#endif
extern "C" PyObject* initModule(void);
REGISTER_TORCH_DEPLOY_BUILTIN(cpython_internal, PyImport_FrozenModules);
#ifdef FBCODE_CAFFE2
REGISTER_TORCH_DEPLOY_BUILTIN(frozentorch, nullptr, "torch._C", initModule);
#else
extern "C" struct _frozen _PyImport_FrozenModules_torch[];
REGISTER_TORCH_DEPLOY_BUILTIN(
frozentorch,
_PyImport_FrozenModules_torch,
"torch._C",
initModule);
#endif
BuiltinRegistryItem::BuiltinRegistryItem(
const char* _name,
const struct _frozen* _frozenModules,
std::vector<std::pair<const char*, void*>>&& _builtinModules)
: name(_name),
frozenModules(_frozenModules),
builtinModules(std::move(_builtinModules)) {
numModules = 0;
if (frozenModules) {
while (frozenModules[numModules].name != nullptr) {
++numModules;
}
}
fprintf(
stderr,
"torch::deploy builtin %s contains %u modules\n",
name,
numModules);
}
BuiltinRegistry* BuiltinRegistry::get() {
static BuiltinRegistry _registry;
return &_registry;
}
void BuiltinRegistry::runPreInitialization() {
TORCH_INTERNAL_ASSERT(!Py_IsInitialized());
sanityCheck();
PyImport_FrozenModules = BuiltinRegistry::getAllFrozenModules();
TORCH_INTERNAL_ASSERT(PyImport_FrozenModules != nullptr);
appendCPythonInittab();
}
const char* metaPathSetupTemplate = R"PYTHON(
import sys
from importlib.metadata import DistributionFinder, Distribution
# We need to register a custom meta path finder because we are registering
# `torch._C` as a builtin module.
#
# Normally, builtins will be found by the `BuiltinImporter` meta path finder.
# However, `BuiltinImporter` is hard-coded to assume that all builtin modules
# are top-level imports. Since `torch._C` is a submodule of `torch`, the
# BuiltinImporter skips it.
class F:
MODULES = {<<<DEPLOY_BUILTIN_MODULES_CSV>>>}
def find_spec(self, fullname, path, target=None):
if fullname in self.MODULES:
# Load this module using `BuiltinImporter`, but set `path` to None
# in order to trick it into loading our module.
return sys.meta_path[1].find_spec(fullname, path=None, target=None)
return None
def find_distributions(self, context=DistributionFinder.Context()):
modules = {"torch"} | self.MODULES
# Insert dummy distribution records for each builtin module so
# importlib.metadata.version(...) works.
if context.name is None:
for name in modules:
yield DummyDistribution(name)
if context.name in modules:
yield DummyDistribution(context.name)
class DummyDistribution(Distribution):
def __init__(self, name):
self._metadata = {
"Name": name,
"Version": "0.0.1+fake_multipy",
}
@property
def metadata(self):
return self._metadata
sys.meta_path.insert(0, F())
)PYTHON";
void BuiltinRegistry::runPostInitialization() {
TORCH_INTERNAL_ASSERT(Py_IsInitialized());
std::string metaPathSetupScript(metaPathSetupTemplate);
std::string replaceKey = "<<<DEPLOY_BUILTIN_MODULES_CSV>>>";
size_t pos = metaPathSetupScript.find(replaceKey);
if (pos != std::string::npos) {
metaPathSetupScript.replace(pos, replaceKey.size(), getBuiltinModulesCSV());
}
int r = PyRun_SimpleString(metaPathSetupScript.c_str());
TORCH_INTERNAL_ASSERT(r == 0);
}
void BuiltinRegistry::registerBuiltin(
std::unique_ptr<BuiltinRegistryItem> item) {
if (get()->name2idx_.find(item->name) != get()->name2idx_.end()) {
throw std::runtime_error(std::string("redefine bultin: ") + item->name);
}
get()->name2idx_[item->name] = get()->items_.size();
get()->items_.emplace_back(std::move(item));
}
BuiltinRegistryItem* BuiltinRegistry::getItem(const std::string& name) {
auto itr = get()->name2idx_.find(name);
return itr == get()->name2idx_.end() ? nullptr
: get()->items_[itr->second].get();
}
unsigned BuiltinRegistry::totalNumModules() {
unsigned tot = 0;
for (const auto& itemptr : get()->items_) {
tot += itemptr->numModules;
}
return tot;
}
struct _frozen* BuiltinRegistry::getAllFrozenModules() {
/* Allocate new memory for the combined table */
size_t totNumModules = totalNumModules();
struct _frozen* p = nullptr;
if (totNumModules > 0 &&
totNumModules <= SIZE_MAX / sizeof(struct _frozen) - 1) {
size_t size = sizeof(struct _frozen) * (totNumModules + 1);
p = (_frozen*)PyMem_Malloc(size);
}
if (p == nullptr) {
return nullptr;
}
// mark p as an empty frozen module list
memset(&p[0], 0, sizeof(p[0]));
/* Copy the tables into the new memory */
unsigned off = 0;
for (const auto& itemptr : items()) {
if (itemptr->numModules > 0) {
memcpy(
p + off,
itemptr->frozenModules,
(itemptr->numModules + 1) * sizeof(struct _frozen));
off += itemptr->numModules;
}
}
return p;
}
void BuiltinRegistry::sanityCheck() {
auto* cpythonInternalFrozens = getItem("cpython_internal");
// Num frozen builtins shouldn't change (unless modifying the underlying
// cpython version)
TORCH_INTERNAL_ASSERT(
cpythonInternalFrozens != nullptr &&
cpythonInternalFrozens->numModules == NUM_FROZEN_PY_BUILTIN_MODULES,
"Missing python builtin frozen modules");
auto* frozenpython = getItem("frozenpython");
#ifdef FBCODE_CAFFE2
TORCH_INTERNAL_ASSERT(
frozenpython != nullptr, "Missing frozen python modules");
#else
auto* frozentorch = getItem("frozentorch");
// Check frozenpython+frozentorch together since in OSS frozenpython is empty
// and frozentorch contains stdlib+torch, while in fbcode they are separated
// due to thirdparty2 frozenpython. No fixed number of torch modules to check
// for, but there should be at least one.
TORCH_INTERNAL_ASSERT(
frozenpython != nullptr && frozentorch != nullptr &&
frozenpython->numModules + frozentorch->numModules >
NUM_FROZEN_PY_STDLIB_MODULES + 1,
"Missing frozen python stdlib or torch modules");
#endif
}
std::vector<std::pair<const char*, void*>> BuiltinRegistry::
getAllBuiltinModules() {
std::vector<std::pair<const char*, void*>> allBuiltinModules;
for (const auto& itemptr : items()) {
allBuiltinModules.insert(
allBuiltinModules.end(),
itemptr->builtinModules.begin(),
itemptr->builtinModules.end());
}
return allBuiltinModules;
}
void BuiltinRegistry::appendCPythonInittab() {
for (const auto& pair : get()->getAllBuiltinModules()) {
PyImport_AppendInittab(
pair.first, reinterpret_cast<PyObject* (*)()>(pair.second));
}
}
std::string BuiltinRegistry::getBuiltinModulesCSV() {
std::string modulesCSV;
for (const auto& pair : get()->getAllBuiltinModules()) {
if (!modulesCSV.empty()) {
modulesCSV += ", ";
}
modulesCSV += fmt::format("'{}'", pair.first);
}
return modulesCSV;
}
BuiltinRegisterer::BuiltinRegisterer(
const char* name,
const struct _frozen* frozenModules...) {
if (allowLibrary && !allowLibrary(name)) {
fprintf(
stderr,
"Skip %s since it's rejected by the allowLibrary method\n",
name);
return;
}
// gather builtin modules for this lib
va_list args;
va_start(args, frozenModules);
const char* moduleName = nullptr;
void* initFn = nullptr;
std::vector<std::pair<const char*, void*>> builtinModules;
while (true) {
moduleName = va_arg(args, const char*);
// encounter end of sequence
if (moduleName == nullptr) {
break;
}
initFn = va_arg(args, void*);
// skip null init function. This can happen if we create weak reference
// to init functions defined in another library. Depending on if we
// link with that library, the init function pointer will be the real
// implementation or nullptr. tensorrt is a good example. If this is
// a CPU build, we will not link with the tensorrt library, so the init
// function will be nullptr; on the other hand if this is a GPU build,
// we link with the tensorrt library, so the init function will not be
// nullptr.
if (initFn == nullptr) {
continue;
}
builtinModules.emplace_back(moduleName, initFn);
}
// note: don't call glog api in this method since this method is usually
// called before glog get setup
fprintf(
stderr,
"Registering torch::deploy builtin library %s (idx %lu) with %lu builtin modules\n",
name,
BuiltinRegistry::items().size(),
builtinModules.size());
BuiltinRegistry::registerBuiltin(std::make_unique<BuiltinRegistryItem>(
name, frozenModules, std::move(builtinModules)));
}
} // namespace deploy
} // namespace torch