Files
pytorch/torch/csrc/deploy/interpreter/defs.bzl
2022-06-06 09:40:19 -07:00

118 lines
5.5 KiB
Python

load("@fbcode_macros//build_defs:cpp_binary.bzl", "cpp_binary")
load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
load("@fbcode_macros//build_defs:native_rules.bzl", "cxx_genrule")
# @lint-ignore-every BUCKLINT
load("@fbsource//tools/build_defs:fb_native_wrapper.bzl", "fb_native")
def embedded_interpreter(name, suffix, legacy = False, exported_deps = [], exported_external_deps = []):
final_name = name
is_all = suffix == "all"
is_cuda = suffix == "cuda" or is_all
platform_static_lib = []
for platform in ["platform009", "platform010"]:
name = platform + "_" + final_name
so_name = name + ".so"
cpp_binary(
name = so_name,
srcs = [
"interpreter_impl.cpp",
] + (["import_find_sharedfuncptr.cpp"] if is_all else []),
headers = [
"Optional.hpp",
"interpreter_impl.h",
],
header_namespace = "torch/csrc/deploy",
dlopen_enabled = True,
linker_flags = ([
# This ensures only the intended interface symbols are public/global
# the rest are hidden, regardless of how they were compiled
# (e.g. fvisibility=hidden is NOT important for the component
# objs in this library, since we override here.)
"--version-script=$(location :hide_symbols.script)",
] if not is_all else []),
deps = [
"fbsource//third-party/fmt:fmt",
] + ([
":builtin_registry_cuda",
"//caffe2:torch_python_cuda_without_torch",
"//deeplearning/trt/python:frozen_tensorrt",
] if is_cuda else [
":builtin_registry",
"//caffe2:torch_python_without_torch",
]),
external_deps =
[
# needed for interpreter.cpp itself, it uses pybind currently
("frozenpython", None, "python-frozen"),
("frozenpython", None, "python"),
],
fbcode_platform = platform,
)
# We build torch::deploy with two embedded binaries- one with only cpu py bindings,
# the other with cpu+cuda py bindings. This unfortunately wastes some binary size,
# but at least at runtime only one of them is loaded.
#
# This is becuase of two reasons
# (1) that applications such as predictor want to depend on torch::deploy in a
# cuda-agnostic way, e.g. they don't choose yet, and a binary/app that depends
# on predictor either chooses to include or not include a dep on cuda.
#
# (2) the way the embedded binary is created and loaded, it only exposes a small
# set of interface symbols globally, for creating a new interpreter, and hides its
# other symbols (esp. python ones) so they don't conflict with other interpreters.
# This prevents dividing the cpu and cuda portions of bindings into _separate_ libs
# and loading the cuda part additively. Hence to achieve requirement (1) we bundle
# two complete interpreter libs, one with and one without cuda.
cp_cmd = "$(location //caffe2/torch/csrc/deploy:remove_dt_needed)" if suffix == "all" else "cp"
build_name = "build_" + name
if not legacy:
cxx_genrule(
name = build_name,
out = "embedded_interpreter_" + suffix + ".a",
cmd = """\
""" + cp_cmd + """ $(location :""" + so_name + """) libtorch_deployinterpreter_internal_""" + suffix + """.so
ld -r -b binary -o ${TMP}/embedded_interpreter_""" + suffix + """.o libtorch_deployinterpreter_internal_""" + suffix + """.so
objcopy --rename-section .data=.torch_deploy_payload.interpreter_""" + suffix + """,readonly,contents -N _binary_libtorch_deployinterpreter_""" + suffix + """_so_start -N _binary_libtorch_deployinterpreter_""" + suffix + """_so_end ${TMP}/embedded_interpreter_""" + suffix + """.o
ar rcs ${OUT} ${TMP}/embedded_interpreter_""" + suffix + """.o
""",
)
else:
cxx_genrule(
name = build_name,
out = "embedded_interpreter_cuda_legacy.a",
cmd = """\
cp $(location :""" + so_name + """) libtorch_deployinterpreter_cuda.so
ld -r -b binary -o ${TMP}/embedded_interpreter_cuda.o libtorch_deployinterpreter_cuda.so
ar rcs ${OUT} ${TMP}/embedded_interpreter_cuda.o
""",
)
platform_static_lib.append(["^" + platform, ":" + build_name])
internal_name = final_name + "_internal"
fb_native.prebuilt_cxx_library(
preferred_linkage = "static",
name = internal_name,
visibility = ["PUBLIC"],
link_whole = True,
platform_static_lib = platform_static_lib,
)
# a thin wrapper around :embedded_interpreter_internal to add --export-dynamic
# linker flags. The flag will be propagated to cpp_binary. We don't require
# cpp_binary to explicitly enable --export-dynamic any more. New usecases usually
# forgot to do so and caused interpreter not found crash.
cpp_library(
name = final_name,
linker_flags = [
"--export-dynamic",
],
exported_deps = [
":" + internal_name,
] + exported_deps,
exported_external_deps = exported_external_deps,
)