mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
118 lines
5.5 KiB
Python
118 lines
5.5 KiB
Python
load("@fbcode_macros//build_defs:cpp_binary.bzl", "cpp_binary")
|
|
load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
|
|
load("@fbcode_macros//build_defs:native_rules.bzl", "cxx_genrule")
|
|
|
|
# @lint-ignore-every BUCKLINT
|
|
load("@fbsource//tools/build_defs:fb_native_wrapper.bzl", "fb_native")
|
|
|
|
def embedded_interpreter(name, suffix, legacy = False, exported_deps = [], exported_external_deps = []):
|
|
final_name = name
|
|
is_all = suffix == "all"
|
|
is_cuda = suffix == "cuda" or is_all
|
|
platform_static_lib = []
|
|
for platform in ["platform009", "platform010"]:
|
|
name = platform + "_" + final_name
|
|
so_name = name + ".so"
|
|
cpp_binary(
|
|
name = so_name,
|
|
srcs = [
|
|
"interpreter_impl.cpp",
|
|
] + (["import_find_sharedfuncptr.cpp"] if is_all else []),
|
|
headers = [
|
|
"Optional.hpp",
|
|
"interpreter_impl.h",
|
|
],
|
|
header_namespace = "torch/csrc/deploy",
|
|
dlopen_enabled = True,
|
|
linker_flags = ([
|
|
# This ensures only the intended interface symbols are public/global
|
|
# the rest are hidden, regardless of how they were compiled
|
|
# (e.g. fvisibility=hidden is NOT important for the component
|
|
# objs in this library, since we override here.)
|
|
"--version-script=$(location :hide_symbols.script)",
|
|
] if not is_all else []),
|
|
deps = [
|
|
"fbsource//third-party/fmt:fmt",
|
|
] + ([
|
|
":builtin_registry_cuda",
|
|
"//caffe2:torch_python_cuda_without_torch",
|
|
"//deeplearning/trt/python:frozen_tensorrt",
|
|
] if is_cuda else [
|
|
":builtin_registry",
|
|
"//caffe2:torch_python_without_torch",
|
|
]),
|
|
external_deps =
|
|
[
|
|
# needed for interpreter.cpp itself, it uses pybind currently
|
|
("frozenpython", None, "python-frozen"),
|
|
("frozenpython", None, "python"),
|
|
],
|
|
fbcode_platform = platform,
|
|
)
|
|
|
|
# We build torch::deploy with two embedded binaries- one with only cpu py bindings,
|
|
# the other with cpu+cuda py bindings. This unfortunately wastes some binary size,
|
|
# but at least at runtime only one of them is loaded.
|
|
#
|
|
# This is becuase of two reasons
|
|
# (1) that applications such as predictor want to depend on torch::deploy in a
|
|
# cuda-agnostic way, e.g. they don't choose yet, and a binary/app that depends
|
|
# on predictor either chooses to include or not include a dep on cuda.
|
|
#
|
|
# (2) the way the embedded binary is created and loaded, it only exposes a small
|
|
# set of interface symbols globally, for creating a new interpreter, and hides its
|
|
# other symbols (esp. python ones) so they don't conflict with other interpreters.
|
|
# This prevents dividing the cpu and cuda portions of bindings into _separate_ libs
|
|
# and loading the cuda part additively. Hence to achieve requirement (1) we bundle
|
|
# two complete interpreter libs, one with and one without cuda.
|
|
|
|
cp_cmd = "$(location //caffe2/torch/csrc/deploy:remove_dt_needed)" if suffix == "all" else "cp"
|
|
|
|
build_name = "build_" + name
|
|
if not legacy:
|
|
cxx_genrule(
|
|
name = build_name,
|
|
out = "embedded_interpreter_" + suffix + ".a",
|
|
cmd = """\
|
|
""" + cp_cmd + """ $(location :""" + so_name + """) libtorch_deployinterpreter_internal_""" + suffix + """.so
|
|
ld -r -b binary -o ${TMP}/embedded_interpreter_""" + suffix + """.o libtorch_deployinterpreter_internal_""" + suffix + """.so
|
|
objcopy --rename-section .data=.torch_deploy_payload.interpreter_""" + suffix + """,readonly,contents -N _binary_libtorch_deployinterpreter_""" + suffix + """_so_start -N _binary_libtorch_deployinterpreter_""" + suffix + """_so_end ${TMP}/embedded_interpreter_""" + suffix + """.o
|
|
ar rcs ${OUT} ${TMP}/embedded_interpreter_""" + suffix + """.o
|
|
""",
|
|
)
|
|
else:
|
|
cxx_genrule(
|
|
name = build_name,
|
|
out = "embedded_interpreter_cuda_legacy.a",
|
|
cmd = """\
|
|
cp $(location :""" + so_name + """) libtorch_deployinterpreter_cuda.so
|
|
ld -r -b binary -o ${TMP}/embedded_interpreter_cuda.o libtorch_deployinterpreter_cuda.so
|
|
ar rcs ${OUT} ${TMP}/embedded_interpreter_cuda.o
|
|
""",
|
|
)
|
|
platform_static_lib.append(["^" + platform, ":" + build_name])
|
|
|
|
internal_name = final_name + "_internal"
|
|
fb_native.prebuilt_cxx_library(
|
|
preferred_linkage = "static",
|
|
name = internal_name,
|
|
visibility = ["PUBLIC"],
|
|
link_whole = True,
|
|
platform_static_lib = platform_static_lib,
|
|
)
|
|
|
|
# a thin wrapper around :embedded_interpreter_internal to add --export-dynamic
|
|
# linker flags. The flag will be propagated to cpp_binary. We don't require
|
|
# cpp_binary to explicitly enable --export-dynamic any more. New usecases usually
|
|
# forgot to do so and caused interpreter not found crash.
|
|
cpp_library(
|
|
name = final_name,
|
|
linker_flags = [
|
|
"--export-dynamic",
|
|
],
|
|
exported_deps = [
|
|
":" + internal_name,
|
|
] + exported_deps,
|
|
exported_external_deps = exported_external_deps,
|
|
)
|