mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[BE] Remove torch deploy | remove torch deploy specific files (#158290)
This PR removes specific files found in pytorch which are only used for torch::deploy. This is mostly testing code and a debugger. Pull Request resolved: https://github.com/pytorch/pytorch/pull/158290 Approved by: https://github.com/albanD ghstack dependencies: #158288
This commit is contained in:
@ -1,8 +0,0 @@
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
|
||||
# torch::deploy has been moved to pytorch/multipy <!-- codespell:ignore -->
|
||||
|
||||
|
||||
``torch::deploy`` has been moved to its new home at [https://github.com/pytorch/multipy](https://github.com/pytorch/multipy). <!-- codespell:ignore -->
|
@ -1,43 +0,0 @@
|
||||
# Owner(s): ["oncall: package/deploy"]
|
||||
|
||||
import textwrap
|
||||
import types
|
||||
|
||||
from torch.testing._internal.common_utils import run_tests, TestCase
|
||||
from torch.utils._freeze import Freezer, PATH_MARKER
|
||||
|
||||
|
||||
class TestFreezer(TestCase):
|
||||
"""Tests the freeze.py script"""
|
||||
|
||||
def test_compile_string(self):
|
||||
freezer = Freezer(True)
|
||||
code_str = textwrap.dedent(
|
||||
"""
|
||||
class MyCls:
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
"""
|
||||
)
|
||||
co = freezer.compile_string(code_str)
|
||||
num_co = 0
|
||||
|
||||
def verify_filename(co: types.CodeType):
|
||||
nonlocal num_co
|
||||
|
||||
if not isinstance(co, types.CodeType):
|
||||
return
|
||||
|
||||
self.assertEqual(PATH_MARKER, co.co_filename)
|
||||
num_co += 1
|
||||
|
||||
for nested_co in co.co_consts:
|
||||
verify_filename(nested_co)
|
||||
|
||||
verify_filename(co)
|
||||
# there is at least one nested code object besides the top level one
|
||||
self.assertTrue(num_co >= 2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_tests()
|
@ -1,38 +0,0 @@
|
||||
import lldb # type: ignore[import]
|
||||
|
||||
|
||||
# load into lldb instance with:
|
||||
# command script import tools/lldb/deploy_debugger.py
|
||||
|
||||
target = lldb.debugger.GetSelectedTarget()
|
||||
bp = target.BreakpointCreateByRegex("__deploy_register_code")
|
||||
bp.SetScriptCallbackBody(
|
||||
"""\
|
||||
process = frame.thread.GetProcess()
|
||||
target = process.target
|
||||
symbol_addr = frame.module.FindSymbol("__deploy_module_info").GetStartAddress()
|
||||
info_addr = symbol_addr.GetLoadAddress(target)
|
||||
e = lldb.SBError()
|
||||
ptr_size = 8
|
||||
str_addr = process.ReadPointerFromMemory(info_addr, e)
|
||||
file_addr = process.ReadPointerFromMemory(info_addr + ptr_size, e)
|
||||
file_size = process.ReadPointerFromMemory(info_addr + 2*ptr_size, e)
|
||||
load_bias = process.ReadPointerFromMemory(info_addr + 3*ptr_size, e)
|
||||
name = process.ReadCStringFromMemory(str_addr, 512, e)
|
||||
r = process.ReadMemory(file_addr, file_size, e)
|
||||
from tempfile import NamedTemporaryFile
|
||||
from pathlib import Path
|
||||
stem = Path(name).stem
|
||||
with NamedTemporaryFile(prefix=stem, suffix='.so', delete=False) as tf:
|
||||
tf.write(r)
|
||||
print("torch_deploy registering debug information for ", tf.name)
|
||||
cmd1 = f"target modules add {tf.name}"
|
||||
# print(cmd1)
|
||||
lldb.debugger.HandleCommand(cmd1)
|
||||
cmd2 = f"target modules load -f {tf.name} -s {hex(load_bias)}"
|
||||
# print(cmd2)
|
||||
lldb.debugger.HandleCommand(cmd2)
|
||||
|
||||
return False
|
||||
"""
|
||||
)
|
104
torch/_deploy.py
104
torch/_deploy.py
@ -1,104 +0,0 @@
|
||||
# mypy: allow-untyped-defs
|
||||
import io
|
||||
|
||||
import torch
|
||||
from torch.package import Importer, OrderedImporter, PackageImporter, sys_importer
|
||||
from torch.package._package_pickler import create_pickler
|
||||
from torch.package._package_unpickler import PackageUnpickler
|
||||
from torch.serialization import _maybe_decode_ascii
|
||||
|
||||
|
||||
def _save_storages(importer, obj):
|
||||
serialized_storages = []
|
||||
serialized_dtypes = []
|
||||
|
||||
importer = importer if isinstance(importer, torch.package.PackageImporter) else None
|
||||
importers: Importer
|
||||
if importer is not None:
|
||||
importers = OrderedImporter(importer, sys_importer)
|
||||
else:
|
||||
importers = sys_importer
|
||||
|
||||
def persistent_id(obj):
|
||||
if torch.is_storage(obj) or isinstance(obj, torch.storage.TypedStorage):
|
||||
if isinstance(obj, torch.storage.TypedStorage):
|
||||
# TODO: Once we decide to break serialization FC, we can
|
||||
# remove this case
|
||||
dtype = obj.dtype
|
||||
else:
|
||||
dtype = torch.uint8
|
||||
|
||||
serialized_storages.append(obj)
|
||||
serialized_dtypes.append(dtype)
|
||||
return ("storage", len(serialized_storages) - 1)
|
||||
|
||||
if hasattr(obj, "__reduce_deploy__"):
|
||||
if _serialized_reduces.get(id(obj)) is None:
|
||||
_serialized_reduces[id(obj)] = (
|
||||
"reduce_deploy",
|
||||
id(obj),
|
||||
*obj.__reduce_deploy__(importers),
|
||||
)
|
||||
return _serialized_reduces[id(obj)]
|
||||
|
||||
return None
|
||||
|
||||
# Write the pickle data for `obj`
|
||||
data_buf = io.BytesIO()
|
||||
pickler = create_pickler(data_buf, importers)
|
||||
pickler.persistent_id = persistent_id
|
||||
pickler.dump(obj)
|
||||
data_value = data_buf.getvalue()
|
||||
return (
|
||||
data_value,
|
||||
serialized_storages,
|
||||
serialized_dtypes,
|
||||
importer.zip_reader if importer else None,
|
||||
)
|
||||
|
||||
|
||||
def _load_storages(id, zip_reader, obj_bytes, serialized_storages, serialized_dtypes):
|
||||
def persistent_load(saved_id):
|
||||
assert isinstance(saved_id, tuple)
|
||||
typename = _maybe_decode_ascii(saved_id[0])
|
||||
data = saved_id[1:]
|
||||
|
||||
if typename == "storage":
|
||||
# TODO: Once we decide to break serialization FC, we can
|
||||
# stop wrapping with TypedStorage
|
||||
storage = serialized_storages[data[0]]
|
||||
dtype = serialized_dtypes[data[0]]
|
||||
return torch.storage.TypedStorage(
|
||||
wrap_storage=storage.untyped(), dtype=dtype
|
||||
)
|
||||
|
||||
if typename == "reduce_deploy":
|
||||
reduce_id, func, args = data
|
||||
if reduce_id not in _loaded_reduces:
|
||||
_loaded_reduces[reduce_id] = func(_raw_packages[zip_reader], *args)
|
||||
return _loaded_reduces[reduce_id]
|
||||
|
||||
return None
|
||||
|
||||
importer: Importer
|
||||
if zip_reader is not None:
|
||||
importer = OrderedImporter(_get_package(zip_reader), sys_importer)
|
||||
else:
|
||||
importer = sys_importer
|
||||
|
||||
unpickler = PackageUnpickler(importer, io.BytesIO(obj_bytes))
|
||||
unpickler.persistent_load = persistent_load # type: ignore[method-assign]
|
||||
result = _deploy_objects[id] = unpickler.load()
|
||||
return result
|
||||
|
||||
|
||||
def _get_package(zip_reader):
|
||||
if zip_reader not in _raw_packages:
|
||||
_raw_packages[zip_reader] = PackageImporter(zip_reader)
|
||||
return _raw_packages[zip_reader]
|
||||
|
||||
|
||||
_raw_packages: dict = {}
|
||||
_deploy_objects: dict = {}
|
||||
_serialized_reduces: dict = {}
|
||||
_loaded_reduces: dict = {}
|
@ -1,2 +0,0 @@
|
||||
# torch::deploy has been moved to pytorch/multipy <!-- codespell:ignore -->
|
||||
Please check out [https://github.com/pytorch/multipy](https://github.com/pytorch/multipy) to find the new home for torch::deploy. <!-- codespell:ignore -->
|
@ -1,292 +0,0 @@
|
||||
# mypy: allow-untyped-decorators
|
||||
# mypy: allow-untyped-defs
|
||||
"""
|
||||
Freeze Python packages.
|
||||
|
||||
|
||||
|
||||
|
||||
Freezing makes it possible to ship arbitrary Python modules as part of a C++
|
||||
library. The Python source of the module is compiled to bytecode and written
|
||||
to `.c` files, to be imported by Python's built-in FrozenImporter.
|
||||
|
||||
In a normal Python installation, FrozenImporter is only used to bootstrap the
|
||||
initialization of the import machinery. Python's importers are defined in
|
||||
Python (see `_bootstrap.py` and `_bootstrap_external.py`) but need to be
|
||||
retrieved before any importers are available. Freezing the module bytecode
|
||||
resolves this circular dependency.
|
||||
|
||||
This script will freeze the Python standard library. It produces two things:
|
||||
- Bytecode files: A set of `.c` that define C variables containing Python bytecode.
|
||||
- Main file: A `main.c` file listing all of these modules in the right form to be
|
||||
consumed by FrozenImporter.
|
||||
|
||||
The library that wishes to these modules make them available to the local
|
||||
Python instance by extending `PyImport_FrozenModules` appropriately (see
|
||||
https://docs.python.org/3/c-api/import.html#c.PyImport_FrozenModules).
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import functools
|
||||
import itertools
|
||||
import marshal
|
||||
import os
|
||||
import types
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
PATH_MARKER = "<Generated by torch::deploy>"
|
||||
MAIN_INCLUDES = """#include <Python.h>
|
||||
|
||||
"""
|
||||
|
||||
MAIN_PREFIX_TEMPLATE = """
|
||||
// Compiled standard library modules. These should be appended to the existing
|
||||
// `PyImport_FrozenModules` that ships with CPython.
|
||||
struct _frozen {}[] = {{
|
||||
"""
|
||||
|
||||
FAKE_PREFIX = MAIN_PREFIX_TEMPLATE.format("_PyImport_FrozenModules")
|
||||
|
||||
MAIN_SUFFIX = """\
|
||||
{0, 0, 0} /* sentinel */
|
||||
};
|
||||
"""
|
||||
|
||||
# Exclude some standard library modules to:
|
||||
# 1. Slim down the final frozen lib.
|
||||
# 2. Remove functionality we don't want to support.
|
||||
DENY_LIST = [
|
||||
# Interface to unix databases
|
||||
"dbm",
|
||||
# ncurses bindings (terminal interfaces)
|
||||
"curses",
|
||||
# Tcl/Tk GUI
|
||||
"tkinter",
|
||||
"tkinter",
|
||||
# Tests for the standard library
|
||||
"test",
|
||||
"tests",
|
||||
"idle_test",
|
||||
"__phello__.foo.py",
|
||||
# importlib frozen modules. These are already baked into CPython.
|
||||
"_bootstrap.py",
|
||||
"_bootstrap_external.py",
|
||||
]
|
||||
|
||||
NUM_BYTECODE_FILES = 5
|
||||
|
||||
|
||||
def indent_msg(fn):
|
||||
@functools.wraps(fn)
|
||||
def wrapper(*args, **kwargs):
|
||||
args[0].indent += 1
|
||||
ret = fn(*args, **kwargs)
|
||||
args[0].indent -= 1
|
||||
return ret
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@dataclass
|
||||
class FrozenModule:
|
||||
# The fully qualified module name, e.g. 'foo.bar.baz'
|
||||
module_name: str
|
||||
# The name of the C variable that holds the bytecode, e.g. 'M_foo__bar__baz'
|
||||
c_name: str
|
||||
# The size of the C variable. Negative if this module is a package.
|
||||
size: int
|
||||
# The frozen bytecode
|
||||
bytecode: bytes
|
||||
|
||||
|
||||
class Freezer:
|
||||
def __init__(self, verbose: bool):
|
||||
self.frozen_modules: list[FrozenModule] = []
|
||||
self.indent: int = 0
|
||||
self.verbose: bool = verbose
|
||||
|
||||
def msg(self, path: Path, code: str):
|
||||
if not self.verbose:
|
||||
return
|
||||
# P: package dir
|
||||
# F: python file
|
||||
# S: skipped (not a package dir)
|
||||
# X: skipped (deny-listed)
|
||||
# N: skipped (not a python file)
|
||||
print(" " * self.indent, end="")
|
||||
print(f"{code} {path}")
|
||||
|
||||
def write_bytecode(self, install_root):
|
||||
"""
|
||||
Write the `.c` files containing the frozen bytecode.
|
||||
|
||||
Shared frozen modules evenly across the files.
|
||||
"""
|
||||
bytecode_file_names = [f"bytecode_{i}.c" for i in range(NUM_BYTECODE_FILES)]
|
||||
bytecode_files = [
|
||||
open(os.path.join(install_root, name), "w") for name in bytecode_file_names
|
||||
]
|
||||
it = itertools.cycle(bytecode_files)
|
||||
for m in self.frozen_modules:
|
||||
self.write_frozen(m, next(it))
|
||||
|
||||
for f in bytecode_files:
|
||||
f.close()
|
||||
|
||||
def write_main(self, install_root, oss, symbol_name):
|
||||
"""Write the `main.c` file containing a table enumerating all the frozen modules."""
|
||||
with open(os.path.join(install_root, "main.c"), "w") as outfp:
|
||||
outfp.write(MAIN_INCLUDES)
|
||||
for m in self.frozen_modules:
|
||||
outfp.write(f"extern unsigned char {m.c_name}[];\n")
|
||||
|
||||
outfp.write(MAIN_PREFIX_TEMPLATE.format(symbol_name))
|
||||
for m in self.frozen_modules:
|
||||
outfp.write(f'\t{{"{m.module_name}", {m.c_name}, {m.size}}},\n')
|
||||
outfp.write(MAIN_SUFFIX)
|
||||
if oss:
|
||||
outfp.write(FAKE_PREFIX)
|
||||
outfp.write(MAIN_SUFFIX)
|
||||
|
||||
def write_frozen(self, m: FrozenModule, outfp):
|
||||
"""Write a single frozen module's bytecode out to a C variable."""
|
||||
outfp.write(f"unsigned char {m.c_name}[] = {{")
|
||||
for i in range(0, len(m.bytecode), 16):
|
||||
outfp.write("\n\t")
|
||||
for c in bytes(m.bytecode[i : i + 16]):
|
||||
outfp.write(f"{c:d},")
|
||||
outfp.write("\n};\n")
|
||||
|
||||
def compile_path(self, path: Path, top_package_path: Path):
|
||||
"""Entry point for compiling a Path object."""
|
||||
if path.is_dir():
|
||||
self.compile_package(path, top_package_path)
|
||||
else:
|
||||
self.compile_file(path, top_package_path)
|
||||
|
||||
@indent_msg
|
||||
def compile_package(self, path: Path, top_package_path: Path):
|
||||
"""Compile all the files within a Python package dir."""
|
||||
assert path.is_dir()
|
||||
if path.name in DENY_LIST:
|
||||
self.msg(path, "X")
|
||||
return
|
||||
|
||||
# Python packages are directories that have __init__.py in them.
|
||||
is_package_dir = any(child.name == "__init__.py" for child in path.iterdir())
|
||||
if not is_package_dir:
|
||||
self.msg(path, "S")
|
||||
return
|
||||
|
||||
self.msg(path, "P")
|
||||
# Recursively compile all children in this dir
|
||||
for child in path.iterdir():
|
||||
self.compile_path(child, top_package_path)
|
||||
|
||||
def get_module_qualname(self, file_path: Path, top_package_path: Path) -> list[str]:
|
||||
# `path` looks like 'Lib/foo/bar/baz.py'
|
||||
|
||||
# chop off 'Lib/' to get something that represents a Python module hierarchy.
|
||||
# e.g. 'foo/bar/baz.py', which maps to 'foo.bar.baz'
|
||||
normalized_path = file_path.relative_to(top_package_path.parent)
|
||||
|
||||
if normalized_path.name == "__init__.py":
|
||||
# Special handling for `__init__.py`. In this case, this file
|
||||
# specifies that the containing directory should be treated as a package.
|
||||
# For 'foo/bar/baz/__init__.py':
|
||||
# - The module name is 'baz'
|
||||
module_basename = normalized_path.parent.name
|
||||
# - The parent is foo.bar (need to shave off the 'baz')
|
||||
module_parent = normalized_path.parent.parent.parts
|
||||
else:
|
||||
module_basename = normalized_path.stem
|
||||
module_parent = normalized_path.parent.parts
|
||||
return list(module_parent) + [module_basename]
|
||||
|
||||
def compile_string(self, file_content: str) -> types.CodeType:
|
||||
# instead of passing in the real build time path to 'compile', we
|
||||
# pass in a marker instead. This prevents the build time path being
|
||||
# leaked to runtime. That path may not be available at runtime.
|
||||
# Setting the path to a mark make sure it's a hard error rather
|
||||
# than a flaky error when inspect module tries to retrieve python source
|
||||
# code during torchscripting.
|
||||
path_marker = PATH_MARKER
|
||||
return compile(file_content, path_marker, "exec")
|
||||
|
||||
@indent_msg
|
||||
def compile_file(self, path: Path, top_package_path: Path):
|
||||
"""
|
||||
Compile a Python source file to frozen bytecode.
|
||||
|
||||
Append the result to `self.frozen_modules`.
|
||||
"""
|
||||
assert path.is_file()
|
||||
if path.suffix != ".py":
|
||||
self.msg(path, "N")
|
||||
return
|
||||
|
||||
if path.name in DENY_LIST:
|
||||
self.msg(path, "X")
|
||||
return
|
||||
|
||||
self.msg(path, "F")
|
||||
module_qualname = self.get_module_qualname(path, top_package_path)
|
||||
module_mangled_name = "__".join(module_qualname)
|
||||
c_name = "M_" + module_mangled_name
|
||||
|
||||
with open(path) as src_file:
|
||||
co = self.compile_string(src_file.read())
|
||||
|
||||
bytecode = marshal.dumps(co)
|
||||
size = len(bytecode)
|
||||
if path.name == "__init__.py":
|
||||
# Python packages are signified by negative size.
|
||||
size = -size
|
||||
self.frozen_modules.append(
|
||||
FrozenModule(".".join(module_qualname), c_name, size, bytecode)
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Compile py source")
|
||||
parser.add_argument("paths", nargs="*", help="Paths to freeze.")
|
||||
parser.add_argument("--verbose", action="store_true", help="Print debug logs")
|
||||
parser.add_argument(
|
||||
"--install-dir", "--install_dir", help="Root directory for all output files"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--oss",
|
||||
action="store_true",
|
||||
help="If it's OSS build, add a fake _PyImport_FrozenModules",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--symbol-name",
|
||||
"--symbol_name",
|
||||
help="The name of the frozen module array symbol to generate",
|
||||
default="_PyImport_FrozenModules_torch",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
f = Freezer(args.verbose)
|
||||
|
||||
for p in args.paths:
|
||||
path = Path(p)
|
||||
if path.is_dir() and not Path.exists(path / "__init__.py"):
|
||||
# this 'top level path p' is a standard directory containing modules,
|
||||
# not a module itself
|
||||
# each 'mod' could be a dir containing __init__.py or .py file
|
||||
# NB: sorted to make sure this is deterministic
|
||||
for mod in sorted(path.glob("*")):
|
||||
f.compile_path(mod, mod)
|
||||
else:
|
||||
f.compile_path(path, path)
|
||||
|
||||
f.write_bytecode(args.install_dir)
|
||||
f.write_main(args.install_dir, args.oss, args.symbol_name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main() # pragma: no cover
|
Reference in New Issue
Block a user