mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/50458 libinterpreter.so contains a frozen python distribution including torch-python bindings. Freezing refers to serializing bytecode of python standard library modules as well as the torch python library and embedding them in the library code. This library can then be dlopened multiple times in one process context, each interpreter having its own python state and GIL. In addition, each python environment is sealed off from the filesystem and can only import the frozen modules included in the distribution. This change relies on newly added frozenpython, a cpython 3.8.6 fork built for this purpose. Frozenpython provides libpython3.8-frozen.a which contains frozen bytecode and object code for the python standard library. Building on top of frozen python, the frozen torch-python bindings are added in this diff, providing each embedded interpreter with a copy of the torch bindings. Each interpreter is intended to share one instance of libtorch and the underlying tensor libraries. Known issues - Autograd is not expected to work with the embedded interpreter currently, as it manages its own python interactions and needs to coordinate with the duplicated python states in each of the interpreters. - Distributed and cuda stuff is disabled in libinterpreter.so build, needs to be revisited - __file__ is not supported in the context of embedded python since there are no files for the underlying library modules. using __file__ - __version__ is not properly supported in the embedded torch-python, just a workaround for now Test Plan: tested locally and on CI with cmake and buck builds running torch::deploy interpreter_test Reviewed By: ailzhang Differential Revision: D25850783 fbshipit-source-id: a4656377caff25b73913daae7ae2f88bcab8fd88
325 lines
9.7 KiB
C++
325 lines
9.7 KiB
C++
#include <dlfcn.h>
|
|
|
|
#define PY_SSIZE_T_CLEAN
|
|
#include <Python.h>
|
|
#include <iostream>
|
|
#include <torch/csrc/deploy/interpreter/interpreter_impl.h>
|
|
#include <pybind11/embed.h>
|
|
#include <cstdio>
|
|
#include <ATen/ATen.h>
|
|
#include <torch/csrc/jit/python/pybind_utils.h>
|
|
#include <map>
|
|
#include <thread>
|
|
#include <fmt/format.h>
|
|
|
|
namespace py = pybind11;
|
|
using namespace py::literals;
|
|
|
|
// TODO this should come from cmake
|
|
#define DEBUG 0
|
|
template<typename T>
|
|
const auto PYOBJ_ASSERT(T obj) {
|
|
#if (DEBUG == 1)
|
|
if (NULL == obj) {
|
|
PyErr_Print();
|
|
}
|
|
#endif
|
|
TORCH_INTERNAL_ASSERT(NULL != obj);
|
|
}
|
|
|
|
static wchar_t* program;
|
|
|
|
#define FOREACH_LIBRARY(_) \
|
|
_(array) \
|
|
_(_asyncio) \
|
|
_(audioop) \
|
|
_(binascii) \
|
|
_(_bisect) \
|
|
_(_blake2) \
|
|
_(_bz2) \
|
|
_(cmath) \
|
|
_(_codecs_cn) \
|
|
_(_codecs_hk) \
|
|
_(_codecs_iso2022) \
|
|
_(_codecs_jp) \
|
|
_(_codecs_kr) \
|
|
_(_codecs_tw) \
|
|
_(_contextvars) \
|
|
_(_crypt) \
|
|
_(_csv) \
|
|
_(_ctypes) \
|
|
_(_ctypes_test) \
|
|
_(_curses) \
|
|
_(_curses_panel) \
|
|
_(_datetime) \
|
|
_(_decimal) \
|
|
_(_elementtree) \
|
|
_(fcntl) \
|
|
_(grp) \
|
|
_(_hashlib) \
|
|
_(_heapq) \
|
|
_(_json) \
|
|
_(_lsprof) \
|
|
_(_lzma) \
|
|
_(math) \
|
|
_(_md5) \
|
|
_(mmap) \
|
|
_(_multibytecodec) \
|
|
_(_multiprocessing) \
|
|
_(nis) \
|
|
_(_opcode) \
|
|
_(ossaudiodev) \
|
|
_(parser) \
|
|
_(_pickle) \
|
|
_(_posixsubprocess) \
|
|
_(pyexpat) \
|
|
_(_queue) \
|
|
_(_random) \
|
|
_(readline) \
|
|
_(resource) \
|
|
_(select) \
|
|
_(_sha1) \
|
|
_(_sha256) \
|
|
_(_sha3) \
|
|
_(_sha512) \
|
|
_(_socket) \
|
|
_(spwd) \
|
|
_(_ssl) \
|
|
_(_struct) \
|
|
_(syslog) \
|
|
_(termios) \
|
|
_(_testbuffer) \
|
|
_(_testcapi) \
|
|
_(_testimportmultiple) \
|
|
_(_testmultiphase) \
|
|
_(unicodedata) \
|
|
_(xxlimited) \
|
|
_(_xxtestfuzz) \
|
|
_(zlib)
|
|
|
|
#define DECLARE_LIBRARY_INIT(name) extern "C" PyObject* PyInit_##name(void);
|
|
FOREACH_LIBRARY(DECLARE_LIBRARY_INIT)
|
|
#undef DECLARE_LIBRARY_INIT
|
|
|
|
extern "C" __attribute__((visibility("default"))) void initialize_interface(
|
|
InterpreterImpl* s) {
|
|
#define INITIALIZE_MEMBER(func) s->func = func;
|
|
FOREACH_INTERFACE_FUNCTION(INITIALIZE_MEMBER)
|
|
#undef INITIALIZE_MEMBER
|
|
}
|
|
|
|
// These numbers of modules should not change as long as the cpython version
|
|
// embedded in the build remains fixed
|
|
static const size_t NUM_FROZEN_PY_BUILTIN_MODULES = 6;
|
|
static const size_t NUM_FROZEN_PY_STDLIB_MODULES = 680;
|
|
|
|
// We need to preserve the existing FrozenModules list, since it includes
|
|
// important importlib machinery. This code is adapted from the similar
|
|
// `PyImport_ExtendInittab`.
|
|
int extendFrozenModules(struct _frozen *frozenpython, struct _frozen *frozentorch) {
|
|
struct _frozen *p = nullptr;
|
|
size_t a = 0, b = 0, c = 0;
|
|
int res = 0;
|
|
|
|
/* Count the number of entries in both tables */
|
|
for (a = 0; frozenpython[a].name != nullptr; a++) {
|
|
// std::cout << "frozenpython[" << a << "]: " << frozenpython[a].name << std::endl;
|
|
}
|
|
for (b = 0; frozentorch[b].name != nullptr; b++) {
|
|
// std::cout << "frozentorch[" << b << "]: " << frozentorch[b].name << std::endl;
|
|
}
|
|
for (c = 0; PyImport_FrozenModules[c].name != nullptr; c++) {
|
|
// std::cout << "oldfrozen[" << c << "]: " << PyImport_FrozenModules[c].name << std::endl;
|
|
}
|
|
|
|
// Num frozen builtins shouldn't change (unless modifying the underlying cpython version)
|
|
TORCH_INTERNAL_ASSERT(c == NUM_FROZEN_PY_BUILTIN_MODULES, "Missing python builtin frozen modules");
|
|
// Check a+b together since in OSS a is empty and b contains stdlib+torch, while
|
|
// in fbcode they are separated due to thirdparty2 frozenpython.
|
|
// No fixed number of torch modules to check for, but there should be at least one.
|
|
TORCH_INTERNAL_ASSERT(a + b > NUM_FROZEN_PY_STDLIB_MODULES + 1, "Missing frozen python stdlib or torch modules");
|
|
|
|
/* Allocate new memory for the combined table */
|
|
if (a + b + c <= SIZE_MAX / sizeof(struct _frozen) - 1) {
|
|
size_t size = sizeof(struct _frozen) * (a + b + c + 1);
|
|
p = (_frozen*)PyMem_Realloc(p, size);
|
|
}
|
|
if (p == nullptr) {
|
|
return -1;
|
|
}
|
|
|
|
/* Copy the tables into the new memory */
|
|
memcpy(p, PyImport_FrozenModules, (c + 1) * sizeof(struct _frozen));
|
|
memcpy(p + c, frozenpython, (a + 1) * sizeof(struct _frozen));
|
|
memcpy(p + a + c, frozentorch, (b + 1) * sizeof(struct _frozen));
|
|
PyImport_FrozenModules = p;
|
|
return res;
|
|
}
|
|
|
|
// We need to register a custom finder because we are registering `torch._C` as
|
|
// a built-in module, and it will otherwise get skipped by the default importer.
|
|
const char* finder = R"RAW(
|
|
import sys
|
|
# Remove the path-based importer, as we don't want our isolated interpreter to read the file system
|
|
sys.meta_path = sys.meta_path[:-1]
|
|
|
|
class F:
|
|
def find_spec(self, fullname, path, target=None):
|
|
if fullname == 'torch._C':
|
|
return sys.meta_path[1].find_spec('torch._C', None, None)
|
|
return None
|
|
sys.meta_path.insert(0, F())
|
|
|
|
# make loader importable
|
|
)RAW";
|
|
|
|
const char* sysprint = R"RAW(
|
|
import sys
|
|
print("exec_prefix:", sys.base_exec_prefix)
|
|
print("_base_executable:", sys._base_executable)
|
|
print("base_prefix:", sys.base_prefix)
|
|
print("exec_prefix:", sys.exec_prefix)
|
|
print("executable:", sys.executable)
|
|
print("path:", sys.path)
|
|
print("prefix:", sys.prefix)
|
|
|
|
)RAW";
|
|
|
|
extern "C" PyObject* initModule(void);
|
|
extern "C" struct _frozen _PyImport_FrozenModules[];
|
|
extern "C" struct _frozen _PyImport_FrozenModules_torch[];
|
|
|
|
static std::atomic<size_t> s_id;
|
|
std::map<size_t, py::object> forwards;
|
|
|
|
__attribute__((constructor)) void init() {
|
|
|
|
}
|
|
|
|
void startup() {
|
|
#define APPEND_INIT(name) PyImport_AppendInittab(#name, PyInit_##name);
|
|
FOREACH_LIBRARY(APPEND_INIT)
|
|
#undef APPEND_INIT
|
|
PyImport_AppendInittab("torch._C", initModule);
|
|
|
|
int ret = extendFrozenModules(_PyImport_FrozenModules, _PyImport_FrozenModules_torch);
|
|
TORCH_INTERNAL_ASSERT(ret == 0);
|
|
|
|
PyPreConfig preconfig;
|
|
PyPreConfig_InitIsolatedConfig(&preconfig);
|
|
PyStatus status = Py_PreInitialize(&preconfig);
|
|
TORCH_INTERNAL_ASSERT(!PyStatus_Exception(status))
|
|
|
|
PyConfig config;
|
|
PyConfig_InitIsolatedConfig(&config);
|
|
|
|
// Completely blank out the path configuration. This ensures we have complete
|
|
// control of how our embedded Python searches for modules, and we will never
|
|
// consult the external filesystem. See:
|
|
// https://docs.python.org/3/c-api/init_config.html#path-configuration
|
|
config.site_import = 0;
|
|
|
|
status = PyConfig_SetString(&config, &config.base_exec_prefix, L"");
|
|
status = PyConfig_SetString(&config, &config.base_executable, L"torch_deploy");
|
|
status = PyConfig_SetString(&config, &config.base_prefix, L"");
|
|
status = PyConfig_SetString(&config, &config.exec_prefix, L"");
|
|
status = PyConfig_SetString(&config, &config.executable, L"torch_deploy");
|
|
status = PyConfig_SetString(&config, &config.prefix, L"");
|
|
|
|
|
|
config.module_search_paths_set = 1;
|
|
std::array<wchar_t*, 0> module_search_paths = {};
|
|
status = PyConfig_SetWideStringList(
|
|
&config, &config.module_search_paths, 0, module_search_paths.data());
|
|
|
|
status = Py_InitializeFromConfig(&config);
|
|
PyConfig_Clear(&config);
|
|
TORCH_INTERNAL_ASSERT(!PyStatus_Exception(status))
|
|
|
|
// Uncomment to debug python config
|
|
// PyRun_SimpleString(sysprint);
|
|
|
|
PyRun_SimpleString(finder);
|
|
// Release the GIL that PyInitialize acquires
|
|
PyEval_SaveThread();
|
|
}
|
|
|
|
void teardown() {
|
|
PyGILState_Ensure();
|
|
|
|
if (Py_FinalizeEx() < 0) {
|
|
std::cout << "IT BROKE SO WE ARE EXITING\n";
|
|
exit(120);
|
|
}
|
|
PyMem_RawFree(program);
|
|
}
|
|
|
|
__attribute__((destructor)) void deinit() {}
|
|
|
|
void run_some_python(const char* code) {
|
|
PyGILState_STATE gstate = PyGILState_Ensure();
|
|
|
|
if (PyRun_SimpleString(code) == -1) {
|
|
throw std::runtime_error("python eval failed\n");
|
|
}
|
|
PyGILState_Release(gstate);
|
|
}
|
|
|
|
void run_python_file(const char* code) {
|
|
PyGILState_STATE gstate = PyGILState_Ensure();
|
|
|
|
FILE* f = fopen(code, "r");
|
|
if (PyRun_SimpleFile(f, code) == -1) {
|
|
throw std::runtime_error("python eval failed\n");
|
|
}
|
|
fclose(f);
|
|
|
|
PyGILState_Release(gstate);
|
|
}
|
|
|
|
|
|
size_t load_model(const char* filename, bool hermetic) {
|
|
PyGILState_STATE gstate = PyGILState_Ensure();
|
|
TORCH_INTERNAL_ASSERT(PyGILState_Check() == 1);
|
|
std::string code;
|
|
|
|
if (hermetic) {
|
|
code = fmt::format(R"(
|
|
from torch.package import PackageImporter
|
|
|
|
i = PackageImporter('{}')
|
|
model = i.load_pickle('model', 'model.pkl')
|
|
)", filename);
|
|
} else {
|
|
code = std::string("model = torch.jit.load('") +
|
|
std::string(filename) + std::string("')");
|
|
}
|
|
py::exec(code);
|
|
|
|
auto id = ++s_id;
|
|
|
|
PyGILState_Release(gstate);
|
|
return id;
|
|
}
|
|
|
|
at::Tensor forward_model(size_t model_id, at::Tensor const & input) {
|
|
at::Tensor output;
|
|
PyGILState_STATE gstate = PyGILState_Ensure();
|
|
{
|
|
TORCH_INTERNAL_ASSERT(PyGILState_Check() == 1);
|
|
auto forward = py::globals()["model"].attr("forward");
|
|
|
|
py::object py_output = forward(input);
|
|
// TODO is this going to leak?
|
|
// added it to prevent crash wehn using 'output' tensor in callee of
|
|
// forward()
|
|
py_output.inc_ref();
|
|
output = py::cast<at::Tensor>(py_output);
|
|
}
|
|
|
|
PyGILState_Release(gstate);
|
|
|
|
return output;
|
|
// return input;
|
|
}
|