Files
pytorch/tools/target_definitions.bzl
2022-06-06 09:40:19 -07:00

569 lines
20 KiB
Python

# @lint-ignore-every BUCKLINT supress the warning for using native
load("@bazel_skylib//lib:paths.bzl", "paths")
load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
load("@fbcode_macros//build_defs:cpp_python_extension.bzl", "cpp_python_extension")
load("@fbcode_macros//build_defs:custom_rule.bzl", "custom_rule")
load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")
load("@fbsource//tools/build_defs:glob_defs.bzl", "glob")
load(
"//caffe2:build_variables.bzl",
"glob_libtorch_python_sources",
"libtorch_cuda_sources",
"libtorch_nvfuser_generated_headers",
"libtorch_nvfuser_runtime_sources",
"libtorch_python_cuda_sources",
"libtorch_sources",
"torch_cpp_srcs",
)
load(
"//caffe2:defs_hip.bzl",
"get_hip_flags",
"hip_external_deps",
"hip_pp_flags",
)
load("//caffe2/caffe2/fb:defs_gpu.bzl", "gpu_library_selector", "gpu_library_targets", "is_amd_build")
load("//tools/build/buck:nccl_deps.bzl", "get_nccl_dependency")
def _path_to_filename(fname):
return paths.split_extension(paths.basename(fname))[0]
def use_kineto():
return native.host_info().os.is_linux and native.host_info().arch.is_x86_64 and not is_amd_build()
def add_torch_libs():
r = {}
torch_cpp_headers = glob(["torch/csrc/api/include/**/*.h"]) + ["torch/script.h"]
libtorch_python_sources = glob_libtorch_python_sources()
use_mpi = native.read_config("fbcode", "caffe2_use_mpi", None)
enable_flatbuffer = bool(native.read_config("fbcode", "caffe2_enable_flatbuffer", None))
compiler_flags_cpu = [
"-DUSE_C10D",
"-DUSE_NUMPY",
"-DUSE_SCALARS",
"-DNO_CUDNN_DESTROY_HANDLE",
"-DBUILD_CAFFE2",
"-DTORCH_ENABLE_LLVM",
"-Wno-write-strings",
"-Wno-format",
"-Wno-strict-aliasing",
"-Wno-non-virtual-dtor",
"-Wno-shadow-compatible-local",
"-Wno-empty-body",
] + ([] if native.host_info().os.is_windows else [
# XNNPACK depends on an updated version of pthreadpool interface, whose implementation
# includes <pthread.h> - a header not available on Windows.
"-DUSE_XNNPACK",
])
# We should really include preprocessor flags here
# instead of compiler_flags
propagated_pp_flags_cpu = [
"-DSYMBOLICATE_MOBILE_DEBUG_HANDLE",
"-DUSE_DISTRIBUTED",
"-DUSE_C10D_GLOO",
"-DUSE_RPC",
"-DUSE_TENSORPIPE",
] + (
["-DUSE_C10D_MPI"] if use_mpi else []
) + (
["-DUSE_KINETO", "-DUSE_KINETO_UPDATED"] if use_kineto() else []
) + (
["-DENABLE_LIBKINETO_CLIENT"] if native.read_config("kineto", "enable_libkineto_client", "1") == "1" else []
)
compiler_flags_cuda = [
"-DUSE_CUDNN",
"-DUSE_NCCL",
]
compiler_flags_hip = []
propagated_pp_flags_cuda = [
"-DUSE_CUDA",
"-DUSE_C10D_NCCL",
]
common_headers = glob([
"torch/csrc/**/*.h",
# c10d used to be a separate library whose includes ended in .hpp.
"torch/csrc/distributed/c10d/*.hpp",
"torch/csrc/generic/*.cpp",
]) + [
"torch/csrc/deploy/Exception.h",
"torch/csrc/deploy/deploy.h",
"torch/csrc/deploy/elf_file.h",
"torch/csrc/deploy/environment.h",
"torch/csrc/deploy/interpreter/builtin_registry.h",
"torch/csrc/deploy/interpreter/interpreter_impl.h",
"torch/csrc/deploy/loader.h",
"torch/csrc/deploy/mem_file.h",
"torch/csrc/deploy/noop_environment.h",
"torch/csrc/deploy/path_environment.h",
"torch/csrc/deploy/unity/tests/test_unity.h",
"torch/csrc/deploy/unity/xar_environment.h",
"torch/csrc/distributed/rpc/metrics/RpcMetricsHandler.h",
"test/cpp/jit/test_custom_class_registrations.h",
"test/cpp/jit/test_utils.h",
"test/cpp/tensorexpr/gtest_assert_float_eq.h",
"test/cpp/tensorexpr/padded_buffer.h",
"test/cpp/tensorexpr/test_base.h",
"test/cpp/tensorexpr/test_utils.h",
]
common_headers.remove("torch/csrc/jit/serialization/mobile_bytecode_generated.h")
common_flags = {
"compiler_specific_flags": {
"clang": [
"-Wno-absolute-value",
"-Wno-expansion-to-defined",
"-Wno-pessimizing-move",
"-Wno-return-type-c-linkage",
"-Wno-unknown-pragmas",
],
},
"headers": common_headers,
}
include_directories = [
"..",
".",
"torch/csrc/api/include",
"torch/csrc",
# c10d used to be a separate library and its includes were c10d/Foo.hpp,
# hence we now need this hack to keep supporting them.
"torch/csrc/distributed",
"torch/csrc/nn",
]
_libtorch_sources = list(libtorch_sources())
# Add the Gloo and TensorPipe backends specific to Facebook networking.
_libtorch_sources.append("torch/csrc/distributed/c10d/fb/GlooDeviceFactory.cpp")
_libtorch_sources.append("torch/csrc/distributed/rpc/fb/tensorpipe_agent.cpp")
cpp_library(
name = "libtorch",
srcs = _libtorch_sources + ([
"torch/csrc/jit/serialization/flatbuffer_serializer.cpp",
"torch/csrc/jit/serialization/flatbuffer_serializer_jit.cpp",
"torch/csrc/jit/mobile/flatbuffer_loader.cpp",
] if enable_flatbuffer else []),
link_whole = True,
include_directories = include_directories,
propagated_pp_flags = propagated_pp_flags_cpu + (["-DENABLE_FLATBUFFER"] if enable_flatbuffer else []),
exported_deps = (
[
":ATen-cpu",
":generated-autograd-headers",
":generated-lazy-headers",
"//caffe2:version_cpp",
"//caffe2/caffe2:caffe2_cpu",
"//caffe2/caffe2/quantization/server:dnnlowp_ops",
"//caffe2/caffe2/serialize:inline_container",
"//caffe2/torch/lib/libshm:libshm",
"//gloo:gloo",
"//gloo/fb/transport/tls:tls",
"//gloo/transport/tcp:tcp",
"//tensorpipe:tensorpipe_cpu",
] + (["//kineto/libkineto:kineto"] if use_kineto() else []) +
(["//caffe2:mobile_bytecode"] if enable_flatbuffer else [])
),
exported_external_deps = [
("nanopb", None, "protobuf-nanopb"),
("protobuf", None),
("llvm-fb", None, "LLVMAnalysis"),
("llvm-fb", None, "LLVMBPFAsmParser"),
("llvm-fb", None, "LLVMBPFCodeGen"),
("llvm-fb", None, "LLVMCodeGen"),
("llvm-fb", None, "LLVMCore"),
("llvm-fb", None, "LLVMExecutionEngine"),
("llvm-fb", None, "LLVMIRReader"),
("llvm-fb", None, "LLVMInstCombine"),
("llvm-fb", None, "LLVMInterpreter"),
("llvm-fb", None, "LLVMMC"),
("llvm-fb", None, "LLVMNVPTXCodeGen"),
("llvm-fb", None, "LLVMOrcJIT"),
("llvm-fb", None, "LLVMRISCVAsmParser"),
("llvm-fb", None, "LLVMRISCVCodeGen"),
("llvm-fb", None, "LLVMScalarOpts"),
("llvm-fb", None, "LLVMSupport"),
("llvm-fb", None, "LLVMTarget"),
("llvm-fb", None, "LLVMTransformUtils"),
("llvm-fb", None, "LLVMVectorize"),
("llvm-fb", None, "LLVMWebAssemblyAsmParser"),
("llvm-fb", None, "LLVMWebAssemblyCodeGen"),
("llvm-fb", None, "LLVMWebAssemblyInfo"),
("llvm-fb", None, "LLVMX86AsmParser"),
("llvm-fb", None, "LLVMX86CodeGen"),
("llvm-fb", None, "LLVMipo"),
] + ([("openmpi", None, "openmpi")] if use_mpi else []),
compiler_flags = compiler_flags_cpu,
**common_flags
)
# Below rules are used to stringify NVfuser runtime library into a header files
python_binary(
name = "nvfuser-stringify",
srcs = ["torch/csrc/jit/codegen/cuda/tools/stringify_file.py"],
base_module = "",
main_module = "torch.csrc.jit.codegen.cuda.tools.stringify_file",
)
# files in libtorch_nvfuser_runtime_sources that are violating package boundaries
# are mapped to their corresponding export_file rules.
violation_paths_to_rule = {
"aten/src/ATen/cuda/detail/PhiloxCudaStateRaw.cuh": ":aten/src/ATen/cuda/detail/PhiloxCudaStateRaw.cuh",
"aten/src/ATen/cuda/detail/UnpackRaw.cuh": ":aten/src/ATen/cuda/detail/UnpackRaw.cuh",
}
for name in libtorch_nvfuser_runtime_sources:
src_path = violation_paths_to_rule.get(name, name)
filename = _path_to_filename(src_path)
native.genrule(
name = "gen-nvfuser-hdr={}.h".format(filename),
srcs = {name: src_path},
bash = "$(exe :nvfuser-stringify) -i $SRCDIR/{} -o $OUT".format(name),
out = "{}.h".format(filename),
)
cpp_library(
name = "generated-nvfuser-headers",
headers = [":gen-nvfuser-hdr=" + x for x in libtorch_nvfuser_generated_headers],
header_namespace = "nvfuser_resources",
)
_libtorch_cuda_sources = list(libtorch_cuda_sources)
cpp_library(
name = "libtorch_cuda",
srcs = _libtorch_cuda_sources,
link_whole = True,
include_directories = include_directories,
# TODO: putting USE_CUDA in propagated_pp_flags is error-prone
propagated_pp_flags = propagated_pp_flags_cuda,
exported_deps = [
":ATen",
":generated-aten-headers-cuda",
":generated-autograd-headers",
":generated-nvfuser-headers",
":libtorch",
"//caffe2/caffe2:caffe2_cpu",
"//caffe2/caffe2:caffe2_gpu",
"//caffe2/torch/lib/libshm:libshm",
"//gloo:gloo_gpu_cuda",
"//tensorpipe:tensorpipe_cuda",
],
exported_external_deps = [
("cudnn", None, "cudnn-lazy"),
("cuda", None, "nvToolsExt-lazy"),
("cuda", None, "nvrtc-lazy"),
("cuda", None, "nvrtc-builtins-lazy"),
] + get_nccl_dependency(),
compiler_flags = compiler_flags_cpu + compiler_flags_cuda,
**common_flags
)
# (original_paths, hipified_paths)
libtorch_hip_headers_filter = torch_cpp_headers + [h for h in common_headers if any([h.startswith(d) for d in [
# headers in the following directories are added to libtorch_hip_headers_filter
# so that they are not hipified.
"torch/csrc/deploy/",
"torch/csrc/distributed/rpc/metrics/",
"torch/csrc/jit/serialization/",
"torch/cpp/jit/",
"torch/cpp/tensorexpr/",
]])]
libtorch_hip_sources = (libtorch_cuda_sources, [f.replace(".cu", ".hip") for f in libtorch_cuda_sources])
libtorch_hip_headers = ([f for f in common_headers if f not in libtorch_hip_headers_filter],) * 2
custom_rule(
name = "fb_libtorch_hipify_gen",
srcs = libtorch_hip_sources[0] + libtorch_hip_headers[0],
build_args = "--source-dir= --hipify-dir= --copy-dir= --rewrite-cu-ext",
build_script_dep = "//caffe2:fb_caffe2_hipify",
output_gen_files = libtorch_hip_sources[1] + libtorch_hip_headers[1],
)
cpp_library(
name = "libtorch_hip_headers",
headers = [":fb_libtorch_hipify_gen={}".format(f) for f in libtorch_hip_headers[1]],
header_namespace = "",
)
cpp_library(
name = "libtorch_hip",
srcs = [":fb_libtorch_hipify_gen={}".format(f) for f in libtorch_hip_sources[1]],
headers = [f for f in common_headers if f in libtorch_hip_headers_filter],
link_whole = True,
propagated_pp_flags = hip_pp_flags,
exported_deps = [
":generated-aten-headers-hip",
":generated-autograd-headers",
":generated-nvfuser-headers",
":libtorch",
":libtorch_hip_headers",
"//caffe2:ATen-hip",
"//caffe2/caffe2:caffe2_cpu",
"//caffe2/caffe2:caffe2_gpu_hip",
"//caffe2/torch/lib/libshm:libshm",
"//gloo:gloo_gpu_hip",
"//tensorpipe:tensorpipe_cpu", # TODO: include a HIP version once it's developed
],
exported_external_deps = hip_external_deps,
compiler_flags = compiler_flags_cpu + compiler_flags_hip + [
"-Wno-unused-result",
],
hip_flags = ["-Wno-unused-result"] + get_hip_flags(),
compiler_specific_flags = common_flags["compiler_specific_flags"],
)
gpu_library_targets(
name = "libtorch_gpu",
deps_cpu = [
":libtorch",
],
deps_cuda = [
":libtorch_cuda",
],
deps_hip = [
":libtorch_hip",
],
exclude_hip_target = False,
extra_external_deps = [],
)
# torch-cpp is still conditionally compiled based on USE_CUDA. Ideally we'd
# separate it out as an additive library instead.
gpu_library_selector(
name = "torch-cpp",
deps_cpu = [":torch-cpp-cpu"],
deps_cuda = [":torch-cpp-cuda"],
deps_hip = [":torch-cpp-hip"],
merge_cpu_deps = False,
exclude_hip_target = False,
)
# USE_CUDA flag is propagated through propagated_pp_flags on libtorch
cpp_library(
name = "torch-cpp-cuda",
srcs = torch_cpp_srcs,
headers = torch_cpp_headers,
include_directories = [
".",
"torch/csrc/api/include/",
],
exported_deps = [
":libtorch_cuda",
"//caffe2/torch/fb/init:init",
],
exported_external_deps = [
("cuda", None, "cuda-lazy"),
("cudnn", None, "cudnn-lazy"),
],
)
cpp_library(
name = "torch-cpp-hip",
srcs = torch_cpp_srcs,
headers = torch_cpp_headers,
include_directories = [
".",
"torch/csrc/api/include/",
],
exported_deps = [
":libtorch_hip",
"//caffe2/torch/fb/init:init",
],
exported_external_deps = hip_external_deps,
)
cpp_library(
name = "torch-cpp-cpu",
srcs = torch_cpp_srcs,
headers = torch_cpp_headers,
include_directories = [
".",
"torch/csrc/api/include/",
],
exported_deps = [
":libtorch",
"//caffe2/torch/fb/init:init",
],
)
# _C_impl is still conditionally compiled based on USE_CUDA. Ideally we'd
# separate it out as an additive library instead.
# TODO: split it into cpp and cuda parts similarly to libtorch
gpu_library_selector(
name = "_C_impl",
deps_cpu = [":_C_impl_cpu"],
deps_cuda = [":_C_impl_cuda"],
deps_hip = [":_C_impl_hip"],
merge_cpu_deps = False,
exclude_hip_target = False,
)
cpp_library(
name = "_C_impl_cpu",
srcs = libtorch_python_sources,
link_whole = True,
exported_deps = [
"fbsource//third-party/fmt:fmt",
":torch-cpp-cpu",
"//caffe2/torch/fb/init:init",
"//caffe2/torch/lib/libshm:libshm",
],
exported_external_deps = [
("numpy", None, "cpp"),
("pybind11", None),
("python", None),
],
compiler_flags = compiler_flags_cpu,
compiler_specific_flags = common_flags["compiler_specific_flags"],
)
# This target is used to help get headers for compile-time deps for torch::deploy
# libinterpreter.so build _without_ getting link-time deps, which are supplied
# separately by the application that dlopens libinterpreter.so.
#
# We make use of the buck auto-generated #headers flavor of a target to accomplish this.
#
# However, since #headers flavor of target with srcs can't be used in all build modes, we
# work around this limitation by using this 'pass-through' target, which has a usable
# #headers flavor in all build modes.
cpp_library(
name = "headers_for_torch_python_deps",
exported_deps = [
":_C_impl_cpu",
],
)
cpp_library(
name = "headers_for_torch_python_cuda_deps",
exported_deps = [
":_C_impl_cuda",
],
)
# This target compiles torch_python bindings, but skips the deps on actual
# torch and python since those will be integrated specially in the wrapper for
# libinterpreter.so used in torch::deploy
cpp_library(
name = "torch_python_without_torch",
srcs = libtorch_python_sources + torch_cpp_srcs,
undefined_symbols = True,
preferred_linkage = "static",
exported_deps = [
":headers_for_torch_python_deps#headers",
],
exported_external_deps = [
("pybind11", None),
("frozenpython", None, "python-headers"),
],
compiler_flags = compiler_flags_cpu + [
# some code in the Python bindings compiles differently
# when you are deploy
"-DUSE_DEPLOY",
],
compiler_specific_flags = common_flags["compiler_specific_flags"],
)
cpp_library(
name = "torch_python_cuda_without_torch",
srcs = libtorch_python_sources + torch_cpp_srcs + libtorch_python_cuda_sources,
undefined_symbols = True,
preferred_linkage = "static",
exported_deps = [
":headers_for_torch_python_cuda_deps#headers",
],
exported_external_deps = [
("pybind11", None),
("frozenpython", None, "python-headers"),
],
compiler_flags = compiler_flags_cpu + [
"-DUSE_CUDA",
# some code in the Python bindings compiles differently
# when you are deploy
"-DUSE_DEPLOY",
],
compiler_specific_flags = common_flags["compiler_specific_flags"],
)
cpp_library(
name = "_C_impl_cuda",
srcs = libtorch_python_sources + libtorch_python_cuda_sources,
link_whole = True,
exported_deps = [
"fbsource//third-party/fmt:fmt",
":torch-cpp-cuda",
"//caffe2/torch/fb/init:init",
"//caffe2/torch/lib/libshm:libshm",
],
exported_external_deps = [
("numpy", None, "cpp"),
("pybind11", None),
("python", None),
],
compiler_flags = compiler_flags_cpu + compiler_flags_cuda,
compiler_specific_flags = common_flags["compiler_specific_flags"],
)
# Autogenerated files whose rules contain ":" are not hipified.
libtorch_python_hip_sources = [f for f in (libtorch_python_sources + libtorch_python_cuda_sources) if ":" in f]
libtorch_python_hip_sources_hipified = [f for f in (libtorch_python_sources + libtorch_python_cuda_sources) if not ":" in f]
custom_rule(
name = "fb_C_impl_hipify_gen",
srcs = libtorch_python_hip_sources_hipified,
build_args = "--source-dir= --hipify-dir= --copy-dir=",
build_script_dep = "//caffe2:fb_caffe2_hipify",
output_gen_files = libtorch_python_hip_sources_hipified,
)
cpp_library(
name = "_C_impl_hip",
srcs = [":fb_C_impl_hipify_gen={}".format(f) for f in (libtorch_python_hip_sources_hipified)] + libtorch_python_hip_sources,
link_whole = True,
exported_deps = [
"fbsource//third-party/fmt:fmt",
":torch-cpp-hip",
"//caffe2/torch/fb/init:init",
"//caffe2/torch/lib/libshm:libshm",
],
exported_external_deps = [
("numpy", None, "cpp"),
("pybind11", None),
("python", None),
],
compiler_flags = compiler_flags_cpu + compiler_flags_hip + ["-Wno-unused-result"],
compiler_specific_flags = common_flags["compiler_specific_flags"],
)
cpp_python_extension(
name = "_C",
srcs = [
"torch/csrc/stub.c",
],
base_module = "torch",
deps = [
":_C_impl",
"//caffe2:flatbuffer_loader",
],
)
cpp_python_extension(
name = "_C_flatbuffer",
srcs = [
"torch/csrc/stub_with_flatbuffer.c",
"torch/csrc/init_flatbuffer_module.cpp",
],
base_module = "torch",
deps = [
":_C_impl",
"//caffe2:flatbuffer_loader",
"//caffe2:flatbuffer_serializer",
],
)
return r