mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Add all bzl files per D36874458
This commit is contained in:
19
android/build_defs.bzl
Normal file
19
android/build_defs.bzl
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
load("@fbsource//tools/build_defs:fb_xplat_cxx_test.bzl", "fb_xplat_cxx_test")
|
||||||
|
load("@fbsource//xplat/caffe2:pt_defs.bzl", "get_build_from_deps_query", "pt_operator_registry")
|
||||||
|
|
||||||
|
DEFAULT_PT_OP_DEPS = [
|
||||||
|
"fbsource//xplat/caffe2:torch_mobile_ops_full_dev",
|
||||||
|
]
|
||||||
|
|
||||||
|
def pt_xplat_cxx_test(name, deps = [], pt_op_deps = DEFAULT_PT_OP_DEPS, **kwargs):
|
||||||
|
code_gen_lib = []
|
||||||
|
if get_build_from_deps_query():
|
||||||
|
lib_name = name + "_lib"
|
||||||
|
pt_operator_registry(lib_name, preferred_linkage = "static", template_select = False, deps = pt_op_deps)
|
||||||
|
code_gen_lib = [":" + lib_name]
|
||||||
|
deps = deps + code_gen_lib
|
||||||
|
fb_xplat_cxx_test(
|
||||||
|
name = name,
|
||||||
|
deps = deps,
|
||||||
|
**kwargs
|
||||||
|
)
|
29
c10/c10_defs.bzl
Normal file
29
c10/c10_defs.bzl
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
load("@fbsource//tools/build_defs:expect.bzl", "expect")
|
||||||
|
load(
|
||||||
|
"@fbsource//tools/build_defs/apple:build_mode_defs.bzl",
|
||||||
|
"is_production_build",
|
||||||
|
)
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# Check if we need to strip glog.
|
||||||
|
def _get_strip_glog_config():
|
||||||
|
c2_strip_glog = native.read_config("caffe2", "strip_glog", "1")
|
||||||
|
expect(
|
||||||
|
c2_strip_glog in ("0", "1"),
|
||||||
|
c2_strip_glog,
|
||||||
|
)
|
||||||
|
return bool(int(c2_strip_glog))
|
||||||
|
|
||||||
|
# For iOS production builds (and all Android builds), strip GLOG logging to
|
||||||
|
# save size. We can disable by setting caffe2.strip_glog=0 in .buckconfig.local.
|
||||||
|
def get_fbobjc_strip_glog_flags():
|
||||||
|
if is_production_build() or _get_strip_glog_config():
|
||||||
|
return ["-UGOOGLE_STRIP_LOG", "-DGOOGLE_STRIP_LOG=3"]
|
||||||
|
else:
|
||||||
|
return ["-UGOOGLE_STRIP_LOG"]
|
||||||
|
|
||||||
|
def get_fbandroid_strip_glog_flags():
|
||||||
|
if _get_strip_glog_config():
|
||||||
|
return ["-UGOOGLE_STRIP_LOG", "-DGOOGLE_STRIP_LOG=1"]
|
||||||
|
else:
|
||||||
|
return []
|
126
c10/defs_hip.bzl
Normal file
126
c10/defs_hip.bzl
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
load("@bazel_skylib//lib:paths.bzl", "paths")
|
||||||
|
load("//caffe2:defs_hip.bzl", "get_hip_file_path")
|
||||||
|
|
||||||
|
gpu_file_extensions = [".cu", ".c", ".cc", ".cpp"]
|
||||||
|
gpu_header_extensions = [".cuh", ".h", ".hpp"]
|
||||||
|
|
||||||
|
def is_test_files(filepath):
|
||||||
|
if filepath.startswith("test"):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_c10_hip_srcs():
|
||||||
|
gpu_file_pattern = [
|
||||||
|
base + suffix
|
||||||
|
for base in c10_includes
|
||||||
|
for suffix in gpu_file_extensions
|
||||||
|
]
|
||||||
|
native_gpu_files = native.glob(gpu_file_pattern)
|
||||||
|
|
||||||
|
gpu_files = []
|
||||||
|
hip_files = []
|
||||||
|
for name in native_gpu_files:
|
||||||
|
# exclude the test folder
|
||||||
|
if is_test_files(name):
|
||||||
|
continue
|
||||||
|
|
||||||
|
gpu_files.append(name)
|
||||||
|
hip_file_name = get_hip_file_path(paths.join("cuda/", name))
|
||||||
|
hip_files.append(hip_file_name)
|
||||||
|
|
||||||
|
# there will be some native hip files that needs suffix changed
|
||||||
|
native_hip_pattern = [
|
||||||
|
"hip/**/*.hip",
|
||||||
|
]
|
||||||
|
native_hip_files = native.glob(native_hip_pattern)
|
||||||
|
|
||||||
|
gpu_files += native_hip_files
|
||||||
|
hip_files += native_hip_files
|
||||||
|
|
||||||
|
# we run hipify script under the caffe2 folder; therefore we need to
|
||||||
|
# prepend c10 to the path so that buck can find the hipified file
|
||||||
|
real_hip_files = []
|
||||||
|
for filename in hip_files:
|
||||||
|
real_hip_files.append(paths.join("c10", filename))
|
||||||
|
|
||||||
|
# return the src and output_gen files
|
||||||
|
return gpu_files, real_hip_files
|
||||||
|
|
||||||
|
def get_c10_hip_headers():
|
||||||
|
gpu_file_pattern = [
|
||||||
|
base + suffix
|
||||||
|
for base in c10_includes
|
||||||
|
for suffix in gpu_header_extensions
|
||||||
|
]
|
||||||
|
native_gpu_files = native.glob(gpu_file_pattern)
|
||||||
|
|
||||||
|
# store the original
|
||||||
|
gpu_files = []
|
||||||
|
hip_files = []
|
||||||
|
for name in native_gpu_files:
|
||||||
|
if is_test_files(name):
|
||||||
|
continue
|
||||||
|
|
||||||
|
gpu_files.append(name)
|
||||||
|
hip_file_name = get_hip_file_path(paths.join("cuda/", name))
|
||||||
|
hip_files.append(hip_file_name)
|
||||||
|
|
||||||
|
# there will be some native hip files that needs suffix changed
|
||||||
|
native_hip_pattern = [
|
||||||
|
"hip/**/*" + suffix
|
||||||
|
for suffix in gpu_header_extensions
|
||||||
|
]
|
||||||
|
native_hip_files = native.glob(native_hip_pattern)
|
||||||
|
|
||||||
|
gpu_files += native_hip_files
|
||||||
|
hip_files += native_hip_files
|
||||||
|
|
||||||
|
# we run hipify script under the caffe2 folder; therefore we need to
|
||||||
|
# prepend c10 to the path so that buck can find the hipified file
|
||||||
|
real_hip_files = []
|
||||||
|
for filename in hip_files:
|
||||||
|
real_hip_files.append(paths.join("c10", filename))
|
||||||
|
|
||||||
|
# return the src and output_gen files
|
||||||
|
return gpu_files, real_hip_files
|
||||||
|
|
||||||
|
def get_c10_hip_test_files():
|
||||||
|
gpu_file_pattern = [
|
||||||
|
base + suffix
|
||||||
|
for base in c10_includes
|
||||||
|
for suffix in gpu_file_extensions
|
||||||
|
]
|
||||||
|
native_gpu_files = native.glob(gpu_file_pattern)
|
||||||
|
|
||||||
|
# store the original
|
||||||
|
gpu_files = []
|
||||||
|
hip_files = []
|
||||||
|
for name in native_gpu_files:
|
||||||
|
if not is_test_files(name):
|
||||||
|
continue
|
||||||
|
|
||||||
|
gpu_files.append(name)
|
||||||
|
hip_file_name = get_hip_file_path(paths.join("cuda/", name))
|
||||||
|
hip_files.append(hip_file_name)
|
||||||
|
|
||||||
|
# there will be some native hip files that needs suffix changed
|
||||||
|
native_hip_pattern = [
|
||||||
|
"hip/test/**/*" + suffix
|
||||||
|
for suffix in gpu_header_extensions
|
||||||
|
]
|
||||||
|
native_hip_files = native.glob(native_hip_pattern)
|
||||||
|
|
||||||
|
gpu_files += native_hip_files
|
||||||
|
hip_files += native_hip_files
|
||||||
|
|
||||||
|
# we run hipify script under the caffe2 folder; therefore we need to
|
||||||
|
# prepend c10 to the path so that buck can find the hipified file
|
||||||
|
real_hip_files = []
|
||||||
|
for filename in hip_files:
|
||||||
|
real_hip_files.append(paths.join("c10", filename))
|
||||||
|
|
||||||
|
# return the src and output_gen files
|
||||||
|
return gpu_files, real_hip_files
|
||||||
|
|
||||||
|
c10_includes = ["**/*"]
|
276
c10/ovrsource_defs.bzl
Normal file
276
c10/ovrsource_defs.bzl
Normal file
@ -0,0 +1,276 @@
|
|||||||
|
load("//arvr/tools/build_defs:genrule_utils.bzl", "gen_cmake_header")
|
||||||
|
load("//arvr/tools/build_defs:oxx.bzl", "oxx_static_library")
|
||||||
|
|
||||||
|
cpu_supported_platforms = [
|
||||||
|
"ovr_config//os:android",
|
||||||
|
"ovr_config//os:iphoneos",
|
||||||
|
"ovr_config//os:linux-x86_64",
|
||||||
|
"ovr_config//os:macos",
|
||||||
|
"ovr_config//os:windows-x86_64",
|
||||||
|
"ovr_config//runtime:arm64-linux-ubuntu-neon",
|
||||||
|
]
|
||||||
|
|
||||||
|
cuda_supported_platforms = [
|
||||||
|
"ovr_config//os:linux-cuda",
|
||||||
|
"ovr_config//os:windows-cuda",
|
||||||
|
]
|
||||||
|
|
||||||
|
def define_c10_ovrsource(name, is_mobile):
|
||||||
|
if is_mobile:
|
||||||
|
pp_flags = ["-DC10_MOBILE=1"]
|
||||||
|
else:
|
||||||
|
pp_flags = []
|
||||||
|
|
||||||
|
oxx_static_library(
|
||||||
|
name = name,
|
||||||
|
srcs = native.glob([
|
||||||
|
"core/*.cpp",
|
||||||
|
"core/impl/*.cpp",
|
||||||
|
"mobile/*.cpp",
|
||||||
|
"util/*.cpp",
|
||||||
|
]),
|
||||||
|
compatible_with = cpu_supported_platforms,
|
||||||
|
compiler_flags = select({
|
||||||
|
"DEFAULT": [],
|
||||||
|
"ovr_config//compiler:cl": [
|
||||||
|
"/w",
|
||||||
|
],
|
||||||
|
"ovr_config//toolchain/clang:win": [
|
||||||
|
"-Wno-error",
|
||||||
|
"-Wno-shadow",
|
||||||
|
"-Wno-undef",
|
||||||
|
"-Wno-unused-variable",
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
include_directories = [".."],
|
||||||
|
preprocessor_flags = [
|
||||||
|
"-DNO_EXPORT",
|
||||||
|
"-DC10_BUILD_MAIN_LIB=1",
|
||||||
|
"-DSUPPORTS_BACKTRACE=0",
|
||||||
|
],
|
||||||
|
public_include_directories = [".."],
|
||||||
|
public_preprocessor_flags = pp_flags,
|
||||||
|
public_raw_headers = native.glob([
|
||||||
|
"core/*.h",
|
||||||
|
"macros/*.h",
|
||||||
|
"mobile/*.h",
|
||||||
|
"test/util/*.h", # some external tests use this
|
||||||
|
"util/*.h",
|
||||||
|
]),
|
||||||
|
raw_headers = native.glob([
|
||||||
|
"core/impl/*.h",
|
||||||
|
]),
|
||||||
|
reexport_all_header_dependencies = False,
|
||||||
|
# tests = C10_CPU_TEST_TARGETS,
|
||||||
|
visibility = [
|
||||||
|
"//xplat/caffe2/c10:c10_ovrsource",
|
||||||
|
],
|
||||||
|
deps = select({
|
||||||
|
"DEFAULT": [],
|
||||||
|
"ovr_config//os:linux": [
|
||||||
|
"//third-party/numactl:numactl",
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
exported_deps = [
|
||||||
|
":ovrsource_c10_cmake_macros.h",
|
||||||
|
"//arvr/third-party/gflags:gflags",
|
||||||
|
"//third-party/glog:glog",
|
||||||
|
"//third-party/fmt:fmt",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
def define_ovrsource_targets():
|
||||||
|
# C10_CPU_TEST_FILES = native.glob([
|
||||||
|
# "test/core/*.cpp",
|
||||||
|
# "test/util/*.cpp",
|
||||||
|
# ])
|
||||||
|
|
||||||
|
# C10_GPU_TEST_FILES = native.glob([
|
||||||
|
# "cuda/test/**/*.cpp",
|
||||||
|
# ])
|
||||||
|
|
||||||
|
# C10_CPU_TEST_TARGETS = [
|
||||||
|
# ":" + paths.basename(test)[:-len(".cpp")] + "_ovrsource"
|
||||||
|
# for test in C10_CPU_TEST_FILES
|
||||||
|
# ]
|
||||||
|
|
||||||
|
# C10_GPU_TEST_TARGETS = [
|
||||||
|
# ":" + paths.basename(test)[:-len(".cpp")] + "_ovrsource"
|
||||||
|
# for test in C10_GPU_TEST_FILES
|
||||||
|
# ]
|
||||||
|
|
||||||
|
common_c10_cmake_defines = [
|
||||||
|
("#cmakedefine C10_BUILD_SHARED_LIBS", ""),
|
||||||
|
("#cmakedefine C10_DISABLE_NUMA", ""),
|
||||||
|
("#cmakedefine C10_USE_NUMA", ""),
|
||||||
|
("#cmakedefine C10_USE_MSVC_STATIC_RUNTIME", ""),
|
||||||
|
]
|
||||||
|
|
||||||
|
mobile_c10_cmake_defines = [
|
||||||
|
("#cmakedefine C10_USE_GLOG", ""),
|
||||||
|
("#cmakedefine C10_USE_GFLAGS", ""),
|
||||||
|
]
|
||||||
|
|
||||||
|
non_mobile_c10_cmake_defines = [
|
||||||
|
("#cmakedefine C10_USE_GLOG", "#define C10_USE_GLOG 1"),
|
||||||
|
("#cmakedefine C10_USE_GFLAGS", "#define C10_USE_GFLAGS 1"),
|
||||||
|
]
|
||||||
|
|
||||||
|
gen_cmake_header(
|
||||||
|
src = "macros/cmake_macros.h.in",
|
||||||
|
defines = common_c10_cmake_defines + mobile_c10_cmake_defines,
|
||||||
|
header = "c10/macros/cmake_macros.h",
|
||||||
|
prefix = "ovrsource_c10_mobile_",
|
||||||
|
)
|
||||||
|
|
||||||
|
gen_cmake_header(
|
||||||
|
src = "macros/cmake_macros.h.in",
|
||||||
|
defines = common_c10_cmake_defines + non_mobile_c10_cmake_defines,
|
||||||
|
header = "c10/macros/cmake_macros.h",
|
||||||
|
prefix = "ovrsource_c10_non_mobile_",
|
||||||
|
)
|
||||||
|
|
||||||
|
oxx_static_library(
|
||||||
|
name = "ovrsource_c10_cmake_macros.h",
|
||||||
|
compatible_with = [
|
||||||
|
"ovr_config//os:android",
|
||||||
|
"ovr_config//os:iphoneos",
|
||||||
|
"ovr_config//os:linux",
|
||||||
|
"ovr_config//os:macos",
|
||||||
|
"ovr_config//os:windows",
|
||||||
|
],
|
||||||
|
deps = select({
|
||||||
|
"ovr_config//os:android": [":ovrsource_c10_mobile_cmake_macros.h"],
|
||||||
|
"ovr_config//os:iphoneos": [":ovrsource_c10_mobile_cmake_macros.h"],
|
||||||
|
"ovr_config//os:linux": [":ovrsource_c10_non_mobile_cmake_macros.h"],
|
||||||
|
"ovr_config//os:macos": [":ovrsource_c10_non_mobile_cmake_macros.h"],
|
||||||
|
"ovr_config//os:windows": [":ovrsource_c10_non_mobile_cmake_macros.h"],
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
|
||||||
|
c10_cuda_macros = gen_cmake_header(
|
||||||
|
src = "cuda/impl/cuda_cmake_macros.h.in",
|
||||||
|
defines = [
|
||||||
|
("#cmakedefine C10_CUDA_BUILD_SHARED_LIBS", ""),
|
||||||
|
],
|
||||||
|
header = "c10/cuda/impl/cuda_cmake_macros.h",
|
||||||
|
prefix = "ovrsource",
|
||||||
|
)
|
||||||
|
|
||||||
|
oxx_static_library(
|
||||||
|
name = "c10_ovrsource",
|
||||||
|
compatible_with = cpu_supported_platforms,
|
||||||
|
exported_deps = select({
|
||||||
|
"DEFAULT": [":c10_full_ovrsource"],
|
||||||
|
"ovr_config//os:android": [":c10_mobile_ovrsource"],
|
||||||
|
"ovr_config//os:iphoneos": [":c10_mobile_ovrsource"],
|
||||||
|
}),
|
||||||
|
visibility = ["PUBLIC"],
|
||||||
|
)
|
||||||
|
|
||||||
|
"""
|
||||||
|
Most users should use c10_ovrsource, not these targets directly.
|
||||||
|
"""
|
||||||
|
define_c10_ovrsource("c10_mobile_ovrsource", True)
|
||||||
|
define_c10_ovrsource("c10_full_ovrsource", False)
|
||||||
|
|
||||||
|
oxx_static_library(
|
||||||
|
name = "c10_cuda_ovrsource",
|
||||||
|
srcs = native.glob([
|
||||||
|
"cuda/*.cpp",
|
||||||
|
"cuda/impl/*.cpp",
|
||||||
|
]),
|
||||||
|
compatible_with = cuda_supported_platforms,
|
||||||
|
compiler_flags = select({
|
||||||
|
"DEFAULT": [],
|
||||||
|
"ovr_config//compiler:cl": [
|
||||||
|
"/w",
|
||||||
|
],
|
||||||
|
"ovr_config//toolchain/clang:win": [
|
||||||
|
"-Wno-error",
|
||||||
|
"-Wno-shadow",
|
||||||
|
"-Wno-undef",
|
||||||
|
"-Wno-unused-variable",
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
link_whole = True,
|
||||||
|
preprocessor_flags = [
|
||||||
|
"-DNO_EXPORT",
|
||||||
|
"-DC10_CUDA_BUILD_MAIN_LIB=1",
|
||||||
|
],
|
||||||
|
raw_headers = native.glob([
|
||||||
|
"cuda/*.h",
|
||||||
|
"cuda/impl/*.h",
|
||||||
|
]),
|
||||||
|
reexport_all_header_dependencies = False,
|
||||||
|
# tests = C10_GPU_TEST_TARGETS,
|
||||||
|
visibility = ["PUBLIC"],
|
||||||
|
deps = [
|
||||||
|
"//third-party/cuda:libcuda",
|
||||||
|
"//third-party/cuda:libcudart",
|
||||||
|
],
|
||||||
|
exported_deps = c10_cuda_macros + [
|
||||||
|
":c10_ovrsource",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# [
|
||||||
|
# oxx_test(
|
||||||
|
# name = paths.basename(test)[:-len(".cpp")] + "_ovrsource",
|
||||||
|
# srcs = [test],
|
||||||
|
# compatible_with = cpu_supported_platforms,
|
||||||
|
# compiler_flags = select({
|
||||||
|
# "DEFAULT": [],
|
||||||
|
# "ovr_config//compiler:cl": [
|
||||||
|
# "/w",
|
||||||
|
# ],
|
||||||
|
# "ovr_config//compiler:clang": [
|
||||||
|
# "-Wno-error",
|
||||||
|
# "-Wno-self-assign-overloaded",
|
||||||
|
# "-Wno-self-move",
|
||||||
|
# "-Wno-shadow",
|
||||||
|
# "-Wno-undef",
|
||||||
|
# "-Wno-unused-function",
|
||||||
|
# "-Wno-unused-variable",
|
||||||
|
# ],
|
||||||
|
# }),
|
||||||
|
# framework = "gtest",
|
||||||
|
# oncall = "ovrsource_pytorch",
|
||||||
|
# raw_headers = native.glob([
|
||||||
|
# "test/**/*.h",
|
||||||
|
# ]),
|
||||||
|
# deps = [
|
||||||
|
# ":c10_ovrsource",
|
||||||
|
# ],
|
||||||
|
# )
|
||||||
|
# for test in C10_CPU_TEST_FILES
|
||||||
|
# ]
|
||||||
|
|
||||||
|
# [
|
||||||
|
# oxx_test(
|
||||||
|
# name = paths.basename(test)[:-len(".cpp")] + "_ovrsource",
|
||||||
|
# srcs = [test],
|
||||||
|
# compatible_with = cuda_supported_platforms,
|
||||||
|
# compiler_flags = select({
|
||||||
|
# "DEFAULT": [],
|
||||||
|
# "ovr_config//compiler:cl": [
|
||||||
|
# "/w",
|
||||||
|
# ],
|
||||||
|
# "ovr_config//compiler:clang": [
|
||||||
|
# "-Wno-error",
|
||||||
|
# ],
|
||||||
|
# }),
|
||||||
|
# framework = "gtest",
|
||||||
|
# oncall = "ovrsource_pytorch",
|
||||||
|
# raw_headers = native.glob([
|
||||||
|
# "test/**/*.h",
|
||||||
|
# ]),
|
||||||
|
# runtime_shared_libraries = [
|
||||||
|
# "//third-party/cuda:cudart",
|
||||||
|
# ],
|
||||||
|
# deps = [
|
||||||
|
# ":c10_cuda_ovrsource",
|
||||||
|
# ],
|
||||||
|
# )
|
||||||
|
# for test in C10_GPU_TEST_FILES
|
||||||
|
# ]
|
549
c2_defs.bzl
Normal file
549
c2_defs.bzl
Normal file
@ -0,0 +1,549 @@
|
|||||||
|
load("@bazel_skylib//lib:collections.bzl", "collections")
|
||||||
|
load("@bazel_skylib//lib:paths.bzl", "paths")
|
||||||
|
load("@fbcode_macros//build_defs:native_rules.bzl", "buck_genrule")
|
||||||
|
load("@fbsource//tools/build_defs:default_platform_defs.bzl", "compose_platform_setting_list")
|
||||||
|
load("@fbsource//tools/build_defs:dict_defs.bzl", "dict_defs")
|
||||||
|
load("@fbsource//tools/build_defs:expect.bzl", "expect")
|
||||||
|
load("@fbsource//tools/build_defs:fb_xplat_cxx_library.bzl", "fb_xplat_cxx_library")
|
||||||
|
load("@fbsource//tools/build_defs:fbsource_utils.bzl", "is_arvr_mode")
|
||||||
|
load("@fbsource//tools/build_defs:platform_defs.bzl", "ANDROID", "APPLE", "CXX", "IOS", "MACOSX", "WINDOWS")
|
||||||
|
load("@fbsource//tools/build_defs/apple:build_mode_defs.bzl", "is_production_build")
|
||||||
|
load("@fbsource//tools/build_defs/apple:config_utils_defs.bzl", "STATIC_LIBRARY_IOS_CONFIG", "STATIC_LIBRARY_MAC_CONFIG", "fbobjc_configs")
|
||||||
|
load("@fbsource//tools/build_defs/apple:focus_config.bzl", "is_focus_enabled")
|
||||||
|
load("@fbsource//xplat/pfh/Msgr/Mobile/ProductInfra:DEFS.bzl", "Msgr_Mobile_ProductInfra")
|
||||||
|
|
||||||
|
def get_c2_expose_op_to_c10():
|
||||||
|
c2_op_to_c10 = native.read_config("caffe2", "expose_op_to_c10", "0")
|
||||||
|
|
||||||
|
expect(
|
||||||
|
c2_op_to_c10 in ("0", "1"),
|
||||||
|
c2_op_to_c10,
|
||||||
|
)
|
||||||
|
|
||||||
|
return bool(int(c2_op_to_c10))
|
||||||
|
|
||||||
|
def get_c2_mpscnn():
|
||||||
|
c2_mpscnn = native.read_config("caffe2", "enable_mpscnn", "1")
|
||||||
|
|
||||||
|
expect(
|
||||||
|
c2_mpscnn in ("0", "1"),
|
||||||
|
c2_mpscnn,
|
||||||
|
)
|
||||||
|
|
||||||
|
return bool(int(c2_mpscnn))
|
||||||
|
|
||||||
|
def get_c2_mpscnn_test():
|
||||||
|
c2_mpscnn_test = native.read_config("caffe2", "enable_mpscnn_test", "0")
|
||||||
|
|
||||||
|
expect(
|
||||||
|
c2_mpscnn_test in ("0", "1"),
|
||||||
|
c2_mpscnn_test,
|
||||||
|
)
|
||||||
|
|
||||||
|
return bool(int(c2_mpscnn_test))
|
||||||
|
|
||||||
|
def get_c2_nomnigraph():
|
||||||
|
c2_nomnigraph = native.read_config("caffe2", "enable_nomnigraph", "1")
|
||||||
|
|
||||||
|
expect(
|
||||||
|
c2_nomnigraph in ("0", "1"),
|
||||||
|
c2_nomnigraph,
|
||||||
|
)
|
||||||
|
|
||||||
|
return bool(int(c2_nomnigraph))
|
||||||
|
|
||||||
|
def get_c2_qpl():
|
||||||
|
c2_qpl = native.read_config("caffe2", "enable_qpl", "1")
|
||||||
|
|
||||||
|
expect(
|
||||||
|
c2_qpl in ("0", "1"),
|
||||||
|
c2_qpl,
|
||||||
|
)
|
||||||
|
|
||||||
|
return bool(int(c2_qpl))
|
||||||
|
|
||||||
|
def get_c2_strip_debug_info():
|
||||||
|
c2_strip_debug_info = native.read_config("caffe2", "strip_debug_info", "0")
|
||||||
|
|
||||||
|
expect(
|
||||||
|
c2_strip_debug_info in ("0", "1"),
|
||||||
|
c2_strip_debug_info,
|
||||||
|
)
|
||||||
|
|
||||||
|
return bool(int(c2_strip_debug_info))
|
||||||
|
|
||||||
|
def get_c2_strip_glog():
|
||||||
|
c2_strip_glog = native.read_config("caffe2", "strip_glog", "1")
|
||||||
|
|
||||||
|
expect(
|
||||||
|
c2_strip_glog in ("0", "1"),
|
||||||
|
c2_strip_glog,
|
||||||
|
)
|
||||||
|
|
||||||
|
return bool(int(c2_strip_glog))
|
||||||
|
|
||||||
|
def get_c2_tvm():
|
||||||
|
c2_tvm = native.read_config("caffe2", "enable_tvm", "1")
|
||||||
|
|
||||||
|
expect(
|
||||||
|
c2_tvm in ("0", "1"),
|
||||||
|
c2_tvm,
|
||||||
|
)
|
||||||
|
|
||||||
|
return bool(int(c2_tvm))
|
||||||
|
|
||||||
|
_C2_XPLAT_NO_HPTT_PREPROCESSOR_FLAGS = [
|
||||||
|
"-fexceptions",
|
||||||
|
"-frtti",
|
||||||
|
"-Wno-shadow",
|
||||||
|
"-Wno-unknown-pragmas",
|
||||||
|
"-Wno-unused-variable",
|
||||||
|
"-Wno-sign-compare",
|
||||||
|
"-Icaffe2",
|
||||||
|
"-Imodules",
|
||||||
|
"-DEIGEN_NO_DEBUG",
|
||||||
|
"-DCAFFE2_USE_LITE_PROTO",
|
||||||
|
"-DCAFFE2_USE_GOOGLE_GLOG",
|
||||||
|
"-DCAFFE2_RNN_NO_TEXT_FORMAT",
|
||||||
|
"-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK=1",
|
||||||
|
"-DCAFFE2_IS_XPLAT_BUILD",
|
||||||
|
"-DSTRIP_ERROR_MESSAGES",
|
||||||
|
"-DUSE_INTERNAL_PTHREADPOOL_IMPL",
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_c2_xplat_no_hptt_preprocessor_flags():
|
||||||
|
flags = []
|
||||||
|
flags += _C2_XPLAT_NO_HPTT_PREPROCESSOR_FLAGS
|
||||||
|
if is_arvr_mode() and get_c2_strip_glog():
|
||||||
|
flags += ["-UGOOGLE_STRIP_LOG", "-DGOOGLE_STRIP_LOG=1"]
|
||||||
|
if get_c2_expose_op_to_c10():
|
||||||
|
flags += ["-DEXPOSE_C2_OPS", "-frtti"]
|
||||||
|
return flags
|
||||||
|
|
||||||
|
C2_XPLAT_SERVER_PREPROCESSOR_FLAGS = [
|
||||||
|
"-DCAFFE2_USE_EIGEN_FOR_BLAS",
|
||||||
|
"-DC10_DISABLE_SIGNAL_HANDLERS",
|
||||||
|
"-DCAFFE2_DISABLE_NUMA",
|
||||||
|
]
|
||||||
|
|
||||||
|
C2_XPLAT_HPTT_PREPROCESSOR_FLAGS = [
|
||||||
|
"-DCAFFE2_USE_HPTT",
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_c2_xplat_preprocessor_flags():
|
||||||
|
flags = get_c2_xplat_no_hptt_preprocessor_flags() + C2_XPLAT_HPTT_PREPROCESSOR_FLAGS
|
||||||
|
if get_c2_nomnigraph():
|
||||||
|
flags.append("-DCAFFE2_OPTIMIZER")
|
||||||
|
return flags
|
||||||
|
|
||||||
|
def get_c2_xplat_no_hptt_compiler_flags():
|
||||||
|
return [
|
||||||
|
"-Os",
|
||||||
|
] + get_c2_xplat_no_hptt_preprocessor_flags()
|
||||||
|
|
||||||
|
def get_c2_xplat_compiler_flags():
|
||||||
|
return get_c2_xplat_no_hptt_compiler_flags() + C2_XPLAT_HPTT_PREPROCESSOR_FLAGS
|
||||||
|
|
||||||
|
def get_c2_fbobjc_xplat_compiler_flags():
|
||||||
|
flags = []
|
||||||
|
|
||||||
|
if is_production_build():
|
||||||
|
flags.append("-DCAFFE2_NO_OPERATOR_SCHEMA")
|
||||||
|
|
||||||
|
flags.append("-DCAFFE2_NO_GRADIENT_OPS")
|
||||||
|
|
||||||
|
# For iOS production builds (and all Android builds), strip GLOG logging to
|
||||||
|
# save size. We can disable by setting caffe2.strip_glog=0 in .buckconfig.local.
|
||||||
|
if is_production_build() or get_c2_strip_glog():
|
||||||
|
flags += ["-UGOOGLE_STRIP_LOG", "-DGOOGLE_STRIP_LOG=3"]
|
||||||
|
else:
|
||||||
|
flags.append("-UGOOGLE_STRIP_LOG")
|
||||||
|
|
||||||
|
return flags
|
||||||
|
|
||||||
|
def get_c2_fbandroid_xplat_compiler_flags():
|
||||||
|
flags = [
|
||||||
|
# T95767731 -- remove this once all builds are on at least llvm-13
|
||||||
|
"-Wno-unknown-warning-option",
|
||||||
|
"-Wno-unused-but-set-variable",
|
||||||
|
]
|
||||||
|
|
||||||
|
if get_c2_strip_glog():
|
||||||
|
flags += ["-UGOOGLE_STRIP_LOG", "-DGOOGLE_STRIP_LOG=1"]
|
||||||
|
|
||||||
|
if get_c2_strip_debug_info():
|
||||||
|
flags.append("-g0")
|
||||||
|
|
||||||
|
return flags
|
||||||
|
|
||||||
|
_C2_FBOBJC_COMPILER_FLAGS = [
|
||||||
|
"-Wno-missing-prototypes",
|
||||||
|
"-Wno-global-constructors",
|
||||||
|
"-Wno-unknown-pragmas",
|
||||||
|
"-Wno-invalid-partial-specialization",
|
||||||
|
"-Wno-missing-braces",
|
||||||
|
"-Wno-range-loop-analysis",
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_c2_fbobjc_compiler_flags():
|
||||||
|
flags = list(_C2_FBOBJC_COMPILER_FLAGS)
|
||||||
|
|
||||||
|
# Avoid linking Accelerate on MacOS because we have
|
||||||
|
# inconsistent LAPACK headers (see problems in D19257077).
|
||||||
|
flags.append("-DCAFFE2_USE_ACCELERATE" if not is_arvr_mode() else "-DCAFFE2_USE_EIGEN_FOR_BLAS")
|
||||||
|
if get_c2_mpscnn():
|
||||||
|
flags.append(
|
||||||
|
# TODO(t19120552) - fix this. MPSCNNConvolutionDescriptor.strideInPixelsX
|
||||||
|
# is marked as iOS 11+, but it's been available since iOS 10.
|
||||||
|
"-Wno-unguarded-availability",
|
||||||
|
)
|
||||||
|
return flags
|
||||||
|
|
||||||
|
C2_FBOBJC_MACOSX_COMPILER_FLAGS = [
|
||||||
|
"-msse4.2",
|
||||||
|
]
|
||||||
|
|
||||||
|
C2_FBOBJC_IPHONE_COMPILER_FLAGS = [
|
||||||
|
"-mfpu=neon-fp16",
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_c2_fbobjc_frameworks():
|
||||||
|
frameworks = []
|
||||||
|
if not is_arvr_mode():
|
||||||
|
frameworks.append(
|
||||||
|
# On iOS, presumably Accelerate is a faster BLAS
|
||||||
|
"$SDKROOT/System/Library/Frameworks/Accelerate.framework",
|
||||||
|
)
|
||||||
|
return frameworks
|
||||||
|
|
||||||
|
def get_c2_fbobjc_ios_frameworks():
|
||||||
|
frameworks = []
|
||||||
|
|
||||||
|
if get_c2_mpscnn():
|
||||||
|
frameworks.append(
|
||||||
|
"$SDKROOT/System/Library/Frameworks/Metal.framework",
|
||||||
|
)
|
||||||
|
|
||||||
|
return frameworks
|
||||||
|
|
||||||
|
def get_c2_fbobjc_linker_flags():
|
||||||
|
flags = []
|
||||||
|
|
||||||
|
if get_c2_mpscnn():
|
||||||
|
# Need linker flags as no platform_frameworks exist, and we can't
|
||||||
|
# use MPSCNN on x86_64.
|
||||||
|
# We use weak_framework as it's iOS 10
|
||||||
|
flags = [
|
||||||
|
"-L$SDKROOT/System/Library/Frameworks/MetalPerformanceShaders.framework",
|
||||||
|
"-weak_framework",
|
||||||
|
"MetalPerformanceShaders",
|
||||||
|
]
|
||||||
|
return flags
|
||||||
|
|
||||||
|
def get_c2_fbobjc_exported_preprocessor_flags():
|
||||||
|
flags = []
|
||||||
|
|
||||||
|
if get_c2_mpscnn():
|
||||||
|
flags.append("-DCAFFE2_USE_MPSCNN")
|
||||||
|
|
||||||
|
if get_c2_mpscnn_test():
|
||||||
|
flags.append("-DCAFFE2_USE_MPSCNN_TEST")
|
||||||
|
|
||||||
|
return flags
|
||||||
|
|
||||||
|
def get_c2_fbandroid_exported_preprocessor_flags():
|
||||||
|
flags = []
|
||||||
|
|
||||||
|
BUILD_MODE_DO_NOT_USE_WITHOUT_ASKING_SERIOUSLY = native.read_config(
|
||||||
|
"fbandroid",
|
||||||
|
"build_mode",
|
||||||
|
"dev",
|
||||||
|
)
|
||||||
|
if BUILD_MODE_DO_NOT_USE_WITHOUT_ASKING_SERIOUSLY == "opt":
|
||||||
|
flags.append("-DCAFFE2_NO_OPERATOR_SCHEMA")
|
||||||
|
|
||||||
|
flags.append("-DCAFFE2_NO_GRADIENT_OPS")
|
||||||
|
|
||||||
|
return flags
|
||||||
|
|
||||||
|
C2_FBANDROID_COMPILER_FLAGS = [
|
||||||
|
"-DCAFFE2_USE_EIGEN_FOR_BLAS",
|
||||||
|
"-Wno-unknown-pragmas",
|
||||||
|
"-Wno-deprecated-declarations",
|
||||||
|
"-Wno-invalid-partial-specialization",
|
||||||
|
"-Wno-missing-braces",
|
||||||
|
]
|
||||||
|
|
||||||
|
C2_FBANDROID_ARMV7_COMPILER_FLAGS = [
|
||||||
|
"-mfpu=neon-fp16",
|
||||||
|
]
|
||||||
|
|
||||||
|
C2_FBANDROID_X86_COMPILER_FLAGS = [
|
||||||
|
"-mssse3",
|
||||||
|
]
|
||||||
|
|
||||||
|
C2_FBANDROID_LINKER_FLAGS = []
|
||||||
|
|
||||||
|
C2_FBOBJC_EXTRA_TARGET_CONFIG = {
|
||||||
|
"MTL_LANGUAGE_REVISION": "Metal12",
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_c2_default_cxx_args():
|
||||||
|
return dict(
|
||||||
|
header_namespace = "",
|
||||||
|
apple_sdks = (IOS, MACOSX),
|
||||||
|
compiler_flags = get_c2_xplat_compiler_flags(),
|
||||||
|
fbandroid_compiler_flags = C2_FBANDROID_COMPILER_FLAGS + get_c2_fbandroid_xplat_compiler_flags(),
|
||||||
|
fbandroid_exported_platform_preprocessor_flags = [
|
||||||
|
(
|
||||||
|
"android-armv7",
|
||||||
|
get_c2_fbandroid_exported_preprocessor_flags(),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
fbandroid_linker_flags = C2_FBANDROID_LINKER_FLAGS,
|
||||||
|
fbandroid_platform_compiler_flags = [
|
||||||
|
("android-armv7", C2_FBANDROID_ARMV7_COMPILER_FLAGS),
|
||||||
|
(".*x86.*", C2_FBANDROID_X86_COMPILER_FLAGS),
|
||||||
|
],
|
||||||
|
fbobjc_compiler_flags = get_c2_fbobjc_compiler_flags() + get_c2_fbobjc_xplat_compiler_flags(),
|
||||||
|
fbobjc_configs = fbobjc_configs(
|
||||||
|
STATIC_LIBRARY_IOS_CONFIG,
|
||||||
|
extra_target_config = C2_FBOBJC_EXTRA_TARGET_CONFIG,
|
||||||
|
),
|
||||||
|
fbobjc_exported_platform_linker_flags = [
|
||||||
|
(
|
||||||
|
"iphoneos",
|
||||||
|
get_c2_fbobjc_linker_flags(),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
fbobjc_exported_platform_preprocessor_flags = [
|
||||||
|
(
|
||||||
|
"iphoneos",
|
||||||
|
get_c2_fbobjc_exported_preprocessor_flags(),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
fbobjc_frameworks = get_c2_fbobjc_frameworks() + get_c2_fbobjc_ios_frameworks(),
|
||||||
|
fbobjc_platform_compiler_flags = [
|
||||||
|
("iphoneos", C2_FBOBJC_IPHONE_COMPILER_FLAGS),
|
||||||
|
],
|
||||||
|
macosx_compiler_flags = C2_FBOBJC_MACOSX_COMPILER_FLAGS,
|
||||||
|
fbobjc_macosx_configs_override = fbobjc_configs(
|
||||||
|
STATIC_LIBRARY_MAC_CONFIG,
|
||||||
|
),
|
||||||
|
macosx_frameworks_override = get_c2_fbobjc_frameworks(),
|
||||||
|
preprocessor_flags = [
|
||||||
|
# Use the internal pthreadpool impl for all Caffe2 targets on all
|
||||||
|
# platforms but do not export the preprocessor flag downstream.
|
||||||
|
"-DUSE_INTERNAL_PTHREADPOOL_IMPL",
|
||||||
|
],
|
||||||
|
visibility = ["PUBLIC"],
|
||||||
|
windows_preferred_linkage = "static" if is_arvr_mode() else None,
|
||||||
|
xcode_public_headers_symlinks = True,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_c2_aten_cpu_fbobjc_macosx_deps():
|
||||||
|
if is_focus_enabled():
|
||||||
|
# focus2 is broken when using platform deps (T80070498) so in the case
|
||||||
|
# where it's focus2 we just add fbgemm as a standard dep. Otherwise we
|
||||||
|
# use platform deps to select correctly for arm64.
|
||||||
|
return [
|
||||||
|
"fbsource//xplat/deeplearning/fbgemm:fbgemm",
|
||||||
|
"fbsource//xplat/caffe2:cpukernel_avx2",
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
def get_c2_aten_cpu_fbobjc_macosx_platform_deps():
|
||||||
|
if is_focus_enabled():
|
||||||
|
# focus2 is broken when using platform deps (T80070498) so in the case
|
||||||
|
# where it's focus2 we just add fbgemm as a standard dep. Otherwise we
|
||||||
|
# use platform deps to select correctly for arm64.
|
||||||
|
return []
|
||||||
|
else:
|
||||||
|
return compose_platform_setting_list([
|
||||||
|
{
|
||||||
|
"cpu": "x86_64",
|
||||||
|
"flags": [
|
||||||
|
"fbsource//xplat/deeplearning/fbgemm:fbgemmAppleMac",
|
||||||
|
] + ([
|
||||||
|
"fbsource//xplat/caffe2:cpukernel_avx2AppleMac",
|
||||||
|
] if not is_arvr_mode() else []),
|
||||||
|
"os": "macosx",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cpu": "arm64",
|
||||||
|
"flags": ["fbsource//xplat/third-party/XNNPACK:XNNPACKAppleMac"],
|
||||||
|
"os": "macosx",
|
||||||
|
},
|
||||||
|
])
|
||||||
|
|
||||||
|
def c2_cxx_library(**kwargs):
|
||||||
|
args = get_c2_default_cxx_args()
|
||||||
|
args.update(kwargs)
|
||||||
|
args.setdefault("platforms", (ANDROID, APPLE, CXX, WINDOWS))
|
||||||
|
fb_xplat_cxx_library(
|
||||||
|
labels = [
|
||||||
|
"supermodule:android/default/caffe2",
|
||||||
|
"supermodule:ios/default/public.caffe2",
|
||||||
|
],
|
||||||
|
feature = Msgr_Mobile_ProductInfra,
|
||||||
|
**args
|
||||||
|
)
|
||||||
|
|
||||||
|
def c2_protobuf_rule(protos):
|
||||||
|
cpps = []
|
||||||
|
headers = {}
|
||||||
|
raw_headers = {}
|
||||||
|
for p in protos:
|
||||||
|
proto = paths.basename(p)
|
||||||
|
if native.host_info().os.is_windows:
|
||||||
|
protocexe = "$(exe fbsource//third-party/protobuf:protoc-host)" if is_arvr_mode() else "$(location fbsource//xplat/third-party/protobuf:protoc.Windows)"
|
||||||
|
protocmd = "powershell.exe -file $(location fbsource//xplat/caffe2/scripts:proto)\\proto.ps1 -Protoc {} -Unprocessed $SRCDIR/{} -Processed $SRCDIR/{} -out $OUT -srcdir $SRCDIR".format(protocexe, p, proto)
|
||||||
|
else:
|
||||||
|
protocmd = ("cp $SRCDIR/{} $SRCDIR/{} && chmod +w $SRCDIR/{} && echo \"option optimize_for = LITE_RUNTIME;\" >> $SRCDIR/{} && ".format(p, proto, proto, proto) +
|
||||||
|
"cp $SRCDIR/caffe2/proto/caffe2.proto $SRCDIR/caffe2.proto && chmod +w $SRCDIR/caffe2.proto && echo \"option optimize_for = LITE_RUNTIME;\" >> $SRCDIR/caffe2.proto && " +
|
||||||
|
"sed -i -e 's/caffe2\\/proto\\/caffe2.proto/caffe2.proto/g' $SRCDIR/{} && ".format(proto) +
|
||||||
|
("$(exe fbsource//third-party/protobuf:protoc-host) " if is_arvr_mode() else "$(exe fbsource//xplat/third-party/protobuf:protoc) --osx $(location fbsource//xplat/third-party/protobuf:protoc.Darwin) --linux $(location fbsource//xplat/third-party/protobuf:protoc.Linux) ") +
|
||||||
|
"-I $SRCDIR --cpp_out=$OUT $SRCDIR/{}".format(proto))
|
||||||
|
buck_genrule(
|
||||||
|
name = proto,
|
||||||
|
srcs = sorted(collections.uniq([p, "caffe2/proto/caffe2.proto"])),
|
||||||
|
cmd_exe = protocmd,
|
||||||
|
bash = protocmd,
|
||||||
|
out = ".",
|
||||||
|
)
|
||||||
|
(name, _) = paths.split_extension(proto)
|
||||||
|
cpp = name + ".pb.cc"
|
||||||
|
h = name + ".pb.h"
|
||||||
|
buck_genrule(
|
||||||
|
name = h,
|
||||||
|
cmd_exe = "@powershell -Command \" & { " + "(Get-Content $(location :{})\\{}".format(proto, h) + ") -replace \\\"caffe2.pb.h\\\", \\\"caffe2/proto/caffe2.pb.h\\\" | Set-Content $OUT } \"",
|
||||||
|
bash = "cp -f $(location :{})/{} $OUT && ".format(proto, h) +
|
||||||
|
"sed -i -e 's/caffe2.pb.h/caffe2\\/proto\\/caffe2.pb.h/g' $OUT",
|
||||||
|
out = h,
|
||||||
|
)
|
||||||
|
headers["caffe2/proto/" + h] = ":{}".format(h)
|
||||||
|
raw_headers[h] = ":{}".format(h)
|
||||||
|
buck_genrule(
|
||||||
|
name = cpp,
|
||||||
|
cmd_exe = "@powershell -Command copy $(location :{})/{} $OUT".format(proto, cpp),
|
||||||
|
bash = "cp -f $(location :{})/{} $OUT".format(proto, cpp),
|
||||||
|
out = cpp,
|
||||||
|
)
|
||||||
|
cpps.append(":{}".format(cpp))
|
||||||
|
return (cpps, headers, raw_headers)
|
||||||
|
|
||||||
|
# C2 uses lite version of protobuf while torch/jit uses some method only exists
|
||||||
|
# in full protobuf. This is a temporary workaround to enable experiment build.
|
||||||
|
# DO NOT USE IT IN PRODUCTION BUILD!
|
||||||
|
def c2_full_protobuf_rule(protos):
|
||||||
|
prefix = "full_"
|
||||||
|
cpps = []
|
||||||
|
headers = {}
|
||||||
|
raw_headers = {}
|
||||||
|
for p in protos:
|
||||||
|
proto = paths.basename(p)
|
||||||
|
if native.host_info().os.is_windows:
|
||||||
|
protocexe = "$(exe fbsource//third-party/protobuf:protoc-host)" if is_arvr_mode() else "$(location fbsource//xplat/third-party/protobuf:protoc.Windows)"
|
||||||
|
protocmd = "powershell.exe -file $(location fbsource//xplat/caffe2/scripts:proto)\\proto.ps1 -Protoc {} -Unprocessed $SRCDIR/{} -Processed $SRCDIR/{} -out $OUT -srcdir $SRCDIR".format(protocexe, p, proto)
|
||||||
|
else:
|
||||||
|
protocmd = ("cp $SRCDIR/{} $SRCDIR/{} && ".format(p, proto) +
|
||||||
|
"cp $SRCDIR/caffe2/proto/caffe2.proto $SRCDIR/caffe2.proto && " +
|
||||||
|
"sed -i -e 's/caffe2\\/proto\\/caffe2.proto/caffe2.proto/g' $SRCDIR/{} && ".format(proto) +
|
||||||
|
("$(exe fbsource//third-party/protobuf:protoc-host) " if is_arvr_mode() else "$(exe fbsource//xplat/third-party/protobuf:protoc) --osx $(location fbsource//xplat/third-party/protobuf:protoc.Darwin) --linux $(location fbsource//xplat/third-party/protobuf:protoc.Linux) ") +
|
||||||
|
"-I $SRCDIR --cpp_out=$OUT $SRCDIR/{}".format(proto))
|
||||||
|
buck_genrule(
|
||||||
|
name = prefix + proto,
|
||||||
|
srcs = sorted(collections.uniq([p, "caffe2/proto/caffe2.proto"])),
|
||||||
|
cmd = protocmd,
|
||||||
|
out = ".",
|
||||||
|
)
|
||||||
|
(name, _) = paths.split_extension(proto)
|
||||||
|
cpp = name + ".pb.cc"
|
||||||
|
h = name + ".pb.h"
|
||||||
|
buck_genrule(
|
||||||
|
name = prefix + h,
|
||||||
|
cmd_exe = "@powershell -Command \" & { " + "(Get-Content $(location :{})\\{}".format(prefix + proto, h) + ") -replace \\\"caffe2.pb.h\\\", \\\"caffe2/proto/caffe2.pb.h\\\" | Set-Content $OUT } \"",
|
||||||
|
bash = "cp -f $(location :{})/{} $OUT && ".format(prefix + proto, h) +
|
||||||
|
"sed -i -e 's/caffe2.pb.h/caffe2\\/proto\\/caffe2.pb.h/g' $OUT",
|
||||||
|
out = h,
|
||||||
|
)
|
||||||
|
headers["caffe2/proto/" + h] = ":{}".format(prefix + h)
|
||||||
|
raw_headers[h] = ":{}".format(prefix + h)
|
||||||
|
buck_genrule(
|
||||||
|
name = prefix + cpp,
|
||||||
|
cmd_exe = "@powershell -Command copy $(location :{})/{} $OUT".format(prefix + proto, cpp),
|
||||||
|
bash = "cp -f $(location :{})/{} $OUT".format(prefix + proto, cpp),
|
||||||
|
out = cpp,
|
||||||
|
)
|
||||||
|
cpps.append(":{}".format(prefix + cpp))
|
||||||
|
return (cpps, headers, raw_headers)
|
||||||
|
|
||||||
|
def libcaffe2_cxx_library(name, use_hptt, **kwargs):
|
||||||
|
c2_cxx_library(
|
||||||
|
name = name,
|
||||||
|
exported_deps = [
|
||||||
|
"fbsource//xplat/caffe2/c10:c10",
|
||||||
|
"fbsource//third-party/protobuf:libprotobuf" if is_arvr_mode() else "fbsource//xplat/third-party/protobuf:fb-protobuf-lite",
|
||||||
|
":caffe2_protobuf_headers",
|
||||||
|
":pthreadpool",
|
||||||
|
":common_core",
|
||||||
|
":caffe2_proto_types",
|
||||||
|
],
|
||||||
|
compiler_flags = get_c2_xplat_compiler_flags() if use_hptt else get_c2_xplat_no_hptt_compiler_flags(),
|
||||||
|
exported_preprocessor_flags = get_c2_xplat_preprocessor_flags() if use_hptt else get_c2_xplat_no_hptt_preprocessor_flags(),
|
||||||
|
cxx_preprocessor_flags = C2_XPLAT_SERVER_PREPROCESSOR_FLAGS,
|
||||||
|
fbandroid_exported_preprocessor_flags = get_c2_fbandroid_xplat_compiler_flags(),
|
||||||
|
fbobjc_exported_preprocessor_flags = get_c2_fbobjc_xplat_compiler_flags(),
|
||||||
|
# Hack to work around lack of platform_srcs support in Xcode project generation.
|
||||||
|
macosx_extra_xcode_sources_override = [],
|
||||||
|
link_whole = True,
|
||||||
|
**kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
def c2_operator_library(name, **kwargs):
|
||||||
|
dict_defs.key_extend(
|
||||||
|
kwargs,
|
||||||
|
"deps",
|
||||||
|
[
|
||||||
|
"fbsource//xplat/folly:molly",
|
||||||
|
"fbsource//third-party/glog:glog",
|
||||||
|
":caffe2",
|
||||||
|
] + ([":aten_cpu"] if get_c2_expose_op_to_c10() else []),
|
||||||
|
)
|
||||||
|
|
||||||
|
# NOTE: Currently operators can "depend" on other operators, which is used
|
||||||
|
# so that loading one will implicitly load the dependencies. So, make sure
|
||||||
|
# that no `--as-needed` flags pulled in from dependencies cause these
|
||||||
|
# operator deps to get dropped.
|
||||||
|
linker_flags = [
|
||||||
|
"-Wl,--no-as-needed",
|
||||||
|
]
|
||||||
|
c2_cxx_library(
|
||||||
|
name = name,
|
||||||
|
soname = "lib" + name + ".$(ext)",
|
||||||
|
fbandroid_compiler_flags = get_c2_default_cxx_args()["fbandroid_compiler_flags"] + ["-Os"],
|
||||||
|
fbobjc_compiler_flags = get_c2_default_cxx_args()["fbobjc_compiler_flags"] + ["-Oz", "-DCOMPILING_FOR_MIN_SIZE=1"],
|
||||||
|
link_whole = True,
|
||||||
|
cxx_exported_linker_flags = linker_flags,
|
||||||
|
fbandroid_exported_linker_flags = linker_flags,
|
||||||
|
exported_deps = [
|
||||||
|
":caffe2",
|
||||||
|
],
|
||||||
|
**kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
def c2_genrule(genrule, genfiles, prefix = "", src_path = "", header_namespace = ""):
|
||||||
|
headers = {}
|
||||||
|
srcs = []
|
||||||
|
for generated_filename in genfiles:
|
||||||
|
buck_genrule(
|
||||||
|
name = prefix + generated_filename,
|
||||||
|
bash = "cp -f $(location :{})/{} $OUT".format(genrule, src_path + generated_filename),
|
||||||
|
cmd_exe = "@powershell -Command copy $(location :{})/{} $OUT".format(genrule, src_path + generated_filename),
|
||||||
|
out = generated_filename,
|
||||||
|
)
|
||||||
|
rule = ":{}{}".format(prefix, generated_filename)
|
||||||
|
headers[header_namespace + generated_filename] = rule
|
||||||
|
srcs.append(rule)
|
||||||
|
return {"headers": headers, "srcs": srcs}
|
20
c2_test_defs.bzl
Normal file
20
c2_test_defs.bzl
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
load("@fbsource//tools/build_defs:fb_xplat_cxx_test.bzl", "fb_xplat_cxx_test")
|
||||||
|
load("@fbsource//tools/build_defs:platform_defs.bzl", "ANDROID", "APPLE", "CXX", "IOS", "MACOSX")
|
||||||
|
load("@fbsource//xplat/caffe2:c2_defs.bzl", "get_c2_default_cxx_args")
|
||||||
|
|
||||||
|
def c2_cxx_test(**kwargs):
|
||||||
|
args = get_c2_default_cxx_args()
|
||||||
|
args.update(kwargs)
|
||||||
|
args["fbandroid_use_instrumentation_test"] = True
|
||||||
|
for flag in [
|
||||||
|
"macosx_compiler_flags",
|
||||||
|
"fbobjc_macosx_configs_override",
|
||||||
|
"macosx_frameworks_override",
|
||||||
|
"xcode_public_headers_symlinks",
|
||||||
|
"macosx_inherited_buck_flags_override",
|
||||||
|
]:
|
||||||
|
args.pop(flag, None)
|
||||||
|
args["apple_sdks"] = (IOS, MACOSX)
|
||||||
|
args["platforms"] = (CXX, APPLE, ANDROID)
|
||||||
|
args["contacts"] = ["oncall+ai_infra_mobile_platform@xmail.facebook.com"]
|
||||||
|
fb_xplat_cxx_test(**args)
|
23
caffe2/BUILD_MODE.bzl
Normal file
23
caffe2/BUILD_MODE.bzl
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
""" build mode definitions for caffe2/caffe2 """
|
||||||
|
|
||||||
|
load("@fbcode//:BUILD_MODE.bzl", get_parent_modes = "all_modes_keep_gpu_sections_all_modes_use_lld")
|
||||||
|
load("@fbcode_macros//build_defs:create_build_mode.bzl", "extend_build_mode")
|
||||||
|
|
||||||
|
def update_mode_struct(name, mode_struct):
|
||||||
|
if name == "dev":
|
||||||
|
return extend_build_mode(
|
||||||
|
mode_struct,
|
||||||
|
# TODO(ipbrady): Modules introduce floating point inaccuracies (T43879333)
|
||||||
|
cxx_modules = False,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return mode_struct
|
||||||
|
|
||||||
|
_modes = {
|
||||||
|
mode_name: update_mode_struct(mode_name, mode_struct)
|
||||||
|
for mode_name, mode_struct in get_parent_modes().items()
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_modes():
|
||||||
|
""" Return modes for this file """
|
||||||
|
return _modes
|
89
caffe2/defs.bzl
Normal file
89
caffe2/defs.bzl
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
# useful command for debugging which files are included:
|
||||||
|
# buck targets caffe2/caffe2: --json | jq -r "map(select(.srcs)) | map({key: .name, value: .srcs | sort}) | from_entries"
|
||||||
|
load("@fbsource//tools/build_defs:type_defs.bzl", "is_list")
|
||||||
|
load("//tools/build/buck:flags.bzl", "get_flags")
|
||||||
|
|
||||||
|
flags = get_flags()
|
||||||
|
|
||||||
|
_BASE_PATHS = (
|
||||||
|
"core/*",
|
||||||
|
"core/boxing/*",
|
||||||
|
"core/boxing/impl/*",
|
||||||
|
"core/dispatch/*",
|
||||||
|
"core/op_registration/*",
|
||||||
|
"cuda_rtc/*",
|
||||||
|
"db/*",
|
||||||
|
"experiments/operators/*",
|
||||||
|
"ideep/**/*",
|
||||||
|
"observers/*",
|
||||||
|
"onnx/**/*",
|
||||||
|
"operators/**/*",
|
||||||
|
"observers/*",
|
||||||
|
"predictor/*",
|
||||||
|
"queue/*",
|
||||||
|
"sgd/*",
|
||||||
|
"share/contrib/zstd/*",
|
||||||
|
"transforms/*",
|
||||||
|
"utils/**/*",
|
||||||
|
)
|
||||||
|
|
||||||
|
_BASE_SGX_PATHS = (
|
||||||
|
"core/*",
|
||||||
|
"core/boxing/*",
|
||||||
|
"core/boxing/impl/*",
|
||||||
|
"core/dispatch/*",
|
||||||
|
"core/op_registration/*",
|
||||||
|
"cuda_rtc/*",
|
||||||
|
"db/*",
|
||||||
|
"experiments/operators/*",
|
||||||
|
"observers/*",
|
||||||
|
"onnx/**/*",
|
||||||
|
"operators/**/*",
|
||||||
|
"observers/*",
|
||||||
|
"predictor/*",
|
||||||
|
"queue/*",
|
||||||
|
"sgd/*",
|
||||||
|
"serialize/*",
|
||||||
|
"share/contrib/zstd/*",
|
||||||
|
"transforms/*",
|
||||||
|
"utils/**/*",
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_sgx_patterns(ext):
|
||||||
|
if not is_list(ext):
|
||||||
|
ext = [ext]
|
||||||
|
return [path + e for path in _BASE_SGX_PATHS for e in ext]
|
||||||
|
|
||||||
|
def get_patterns(ext):
|
||||||
|
if not is_list(ext):
|
||||||
|
ext = [ext]
|
||||||
|
return [path + e for path in _BASE_PATHS for e in ext]
|
||||||
|
|
||||||
|
def get_simd_preprocessor_flags():
|
||||||
|
return [
|
||||||
|
"-DUSE_FBGEMM",
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_simd_compiler_flags():
|
||||||
|
if flags.USE_SSE_ONLY:
|
||||||
|
return ["-mno-avx"]
|
||||||
|
|
||||||
|
simd_compiler_flags = [
|
||||||
|
"-mavx",
|
||||||
|
] + get_simd_preprocessor_flags()
|
||||||
|
|
||||||
|
# Every uarch with AVX512 support has AVX2 support
|
||||||
|
if (flags.USE_AVX2 or flags.USE_AVX512):
|
||||||
|
simd_compiler_flags += [
|
||||||
|
"-mavx2",
|
||||||
|
"-mfma",
|
||||||
|
]
|
||||||
|
|
||||||
|
if flags.USE_AVX512:
|
||||||
|
simd_compiler_flags += [
|
||||||
|
"-mavx512f",
|
||||||
|
"-mavx512dq",
|
||||||
|
"-mavx512vl",
|
||||||
|
]
|
||||||
|
|
||||||
|
return simd_compiler_flags
|
149
caffe2/defs_hip.bzl
Normal file
149
caffe2/defs_hip.bzl
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
load("@bazel_skylib//lib:paths.bzl", "paths")
|
||||||
|
load(
|
||||||
|
"//caffe2:defs_hip.bzl",
|
||||||
|
"caffe2_includes",
|
||||||
|
"caffe2_video_image_includes",
|
||||||
|
"get_hip_file_path",
|
||||||
|
)
|
||||||
|
|
||||||
|
gpu_file_extensions = [".cu", ".c", ".cc", ".cpp"]
|
||||||
|
gpu_header_extensions = [".cuh", ".h", ".hpp"]
|
||||||
|
|
||||||
|
def is_caffe2_gpu_file(filepath):
|
||||||
|
# those files are needed since they define placeholders
|
||||||
|
if "/native/cudnn/" in filepath:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# files that are already compatible with hip
|
||||||
|
if "/hip/" in filepath:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# exclude all cudnn and nvrtc implementations except for nvrtc_stub
|
||||||
|
if "/nvrtc_stub/" in filepath:
|
||||||
|
return True
|
||||||
|
if any([keyword in filepath for keyword in ("cudnn", "nvrtc", "NVRTC")]):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if "/cuda/" in filepath:
|
||||||
|
return True
|
||||||
|
|
||||||
|
filename = paths.basename(filepath)
|
||||||
|
_, ext = paths.split_extension(filename)
|
||||||
|
|
||||||
|
if "gpu" in filename or ext in [".cu", ".cuh"]:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_caffe2_hip_srcs(
|
||||||
|
include_patterns = caffe2_includes,
|
||||||
|
include_files = [],
|
||||||
|
project_dir = "caffe2"):
|
||||||
|
gpu_file_pattern = [
|
||||||
|
base + suffix
|
||||||
|
for base in include_patterns
|
||||||
|
for suffix in gpu_file_extensions
|
||||||
|
]
|
||||||
|
native_gpu_files = native.glob(gpu_file_pattern) + include_files
|
||||||
|
|
||||||
|
# store the original
|
||||||
|
gpu_files = []
|
||||||
|
hip_files = []
|
||||||
|
for name in native_gpu_files:
|
||||||
|
# exclude test files
|
||||||
|
if "_test" in paths.basename(name) or not is_caffe2_gpu_file(name):
|
||||||
|
continue
|
||||||
|
|
||||||
|
gpu_files.append(name)
|
||||||
|
hip_file_name = get_hip_file_path(name, is_caffe2 = True)
|
||||||
|
hip_files.append(hip_file_name)
|
||||||
|
|
||||||
|
# there will be some native hip files that needs suffix changed
|
||||||
|
native_hip_pattern = [
|
||||||
|
base[:-1] + "hip/*.hip"
|
||||||
|
for base in include_patterns
|
||||||
|
]
|
||||||
|
native_hip_files = native.glob(native_hip_pattern)
|
||||||
|
|
||||||
|
gpu_files += native_hip_files
|
||||||
|
hip_files += native_hip_files
|
||||||
|
|
||||||
|
# we run hipify script under the caffe2 folder; therefore we need to
|
||||||
|
# prepend caffe2 to the path so that buck can find the hipified file
|
||||||
|
real_hip_files = []
|
||||||
|
for filename in hip_files:
|
||||||
|
real_hip_files.append(paths.join(project_dir, filename))
|
||||||
|
|
||||||
|
# return the src and output_gen files
|
||||||
|
return gpu_files, real_hip_files
|
||||||
|
|
||||||
|
def get_caffe2_hip_headers(
|
||||||
|
include_patterns = caffe2_includes,
|
||||||
|
include_files = [],
|
||||||
|
project_dir = "caffe2"):
|
||||||
|
header_pattern = [
|
||||||
|
base + suffix
|
||||||
|
for base in include_patterns
|
||||||
|
for suffix in gpu_header_extensions
|
||||||
|
]
|
||||||
|
native_header_files = native.glob(header_pattern) + include_files
|
||||||
|
|
||||||
|
header_files = []
|
||||||
|
hip_headers = []
|
||||||
|
for name in native_header_files:
|
||||||
|
# exclude test files
|
||||||
|
# if the caller directly specifies files via include_files, follow it
|
||||||
|
if not name in include_files and ("_test" in paths.basename(name) or not is_caffe2_gpu_file(name)):
|
||||||
|
continue
|
||||||
|
|
||||||
|
header_files.append(name)
|
||||||
|
hip_header_name = get_hip_file_path(name, is_caffe2 = True)
|
||||||
|
hip_headers.append(hip_header_name)
|
||||||
|
|
||||||
|
# we run hipify script under the caffe2 folder; therefore we need to
|
||||||
|
# prepend caffe2 to the path so that buck can find the hipified file
|
||||||
|
real_hip_headers = []
|
||||||
|
for filename in hip_headers:
|
||||||
|
real_hip_headers.append(paths.join(project_dir, filename))
|
||||||
|
|
||||||
|
# return the src and output_gen files
|
||||||
|
return header_files, real_hip_headers
|
||||||
|
|
||||||
|
def get_caffe2_hip_video_image_srcs():
|
||||||
|
return get_caffe2_hip_srcs(include_patterns = caffe2_video_image_includes)
|
||||||
|
|
||||||
|
def get_caffe2_hip_video_image_headers():
|
||||||
|
return get_caffe2_hip_headers(include_patterns = caffe2_video_image_includes)
|
||||||
|
|
||||||
|
def get_caffe2_hip_test_files():
|
||||||
|
test_includes = [
|
||||||
|
"**/*_gpu_test.cc",
|
||||||
|
]
|
||||||
|
|
||||||
|
# let's ignores the mpi test and fb-internal tests for now
|
||||||
|
test_ignores = [
|
||||||
|
"mpi/mpi_gpu_test.cc",
|
||||||
|
# "operators/roi_align_op_gpu_test.cc",
|
||||||
|
"**/fb/**/*_gpu_test.cc",
|
||||||
|
]
|
||||||
|
|
||||||
|
native_test_files = native.glob(test_includes, exclude = test_ignores)
|
||||||
|
|
||||||
|
test_files = []
|
||||||
|
hip_test_files = []
|
||||||
|
for name in native_test_files:
|
||||||
|
if not is_caffe2_gpu_file(name):
|
||||||
|
continue
|
||||||
|
|
||||||
|
test_files.append(name)
|
||||||
|
hip_file_name = get_hip_file_path(name, is_caffe2 = True)
|
||||||
|
hip_test_files.append(hip_file_name)
|
||||||
|
|
||||||
|
# we run hipify script under the caffe2 folder; therefore we need to
|
||||||
|
# prepend caffe2 to the path so that buck can find the hipified file
|
||||||
|
real_hip_test_files = []
|
||||||
|
for filename in hip_test_files:
|
||||||
|
real_hip_test_files.append(paths.join("caffe2", filename))
|
||||||
|
|
||||||
|
# return the src and output_gen files
|
||||||
|
return test_files, real_hip_test_files
|
89
defs.bzl
Normal file
89
defs.bzl
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
def get_sleef_deps():
|
||||||
|
return [("sleef", None, "sleef")] if not (host_info().arch.is_aarch64) else []
|
||||||
|
|
||||||
|
def get_blas_gomp_deps():
|
||||||
|
if host_info().arch.is_x86_64:
|
||||||
|
return [(
|
||||||
|
"IntelComposerXE",
|
||||||
|
None,
|
||||||
|
native.read_config("fbcode", "mkl_lp64", "mkl_lp64_omp"),
|
||||||
|
)]
|
||||||
|
if host_info().arch.is_aarch64:
|
||||||
|
return [
|
||||||
|
("OpenBLAS", None, "OpenBLAS"),
|
||||||
|
("openmp", None, "omp"),
|
||||||
|
]
|
||||||
|
fail("Unsupported architecture")
|
||||||
|
|
||||||
|
default_compiler_flags = [
|
||||||
|
"-Wall",
|
||||||
|
"-Wextra",
|
||||||
|
"-Wno-unused-function",
|
||||||
|
"-Wno-unused-parameter",
|
||||||
|
"-Wno-error=strict-aliasing",
|
||||||
|
"-Wno-unused-local-typedefs",
|
||||||
|
"-Wno-shadow-compatible-local",
|
||||||
|
"-Wno-maybe-uninitialized", # aten is built with gcc as part of HHVM
|
||||||
|
"-Wno-unknown-pragmas",
|
||||||
|
"-Wno-strict-overflow",
|
||||||
|
# See https://fb.facebook.com/groups/fbcode/permalink/1813348245368673/
|
||||||
|
# These trigger on platform007
|
||||||
|
"-Wno-stringop-overflow",
|
||||||
|
"-Wno-class-memaccess",
|
||||||
|
"-DHAVE_MMAP",
|
||||||
|
"-DUSE_GCC_ATOMICS=1",
|
||||||
|
"-D_FILE_OFFSET_BITS=64",
|
||||||
|
"-DHAVE_SHM_OPEN=1",
|
||||||
|
"-DHAVE_SHM_UNLINK=1",
|
||||||
|
"-DHAVE_MALLOC_USABLE_SIZE=1",
|
||||||
|
"-DTH_HAVE_THREAD",
|
||||||
|
"-DCPU_CAPABILITY_DEFAULT",
|
||||||
|
"-DTH_INDEX_BASE=0",
|
||||||
|
"-DMAGMA_V2",
|
||||||
|
"-DNO_CUDNN_DESTROY_HANDLE",
|
||||||
|
"-DUSE_FBGEMM",
|
||||||
|
"-DUSE_QNNPACK",
|
||||||
|
"-DUSE_PYTORCH_QNNPACK",
|
||||||
|
# The dynamically loaded NVRTC trick doesn't work in fbcode,
|
||||||
|
# and it's not necessary anyway, because we have a stub
|
||||||
|
# nvrtc library which we load canonically anyway
|
||||||
|
"-DUSE_DIRECT_NVRTC",
|
||||||
|
"-DUSE_RUY_QMATMUL",
|
||||||
|
] + ([] if native.host_info().os.is_windows else [
|
||||||
|
# XNNPACK depends on an updated version of pthreadpool interface, whose implementation
|
||||||
|
# includes <pthread.h> - a header not available on Windows.
|
||||||
|
"-DUSE_XNNPACK",
|
||||||
|
]) + (["-O1"] if native.read_config("fbcode", "build_mode_test_label", "") == "dev-nosan" else [])
|
||||||
|
|
||||||
|
compiler_specific_flags = {
|
||||||
|
"clang": [
|
||||||
|
"-Wno-absolute-value",
|
||||||
|
"-Wno-pass-failed",
|
||||||
|
"-Wno-braced-scalar-init",
|
||||||
|
],
|
||||||
|
"gcc": [
|
||||||
|
"-Wno-error=array-bounds",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_cpu_parallel_backend_flags():
|
||||||
|
parallel_backend = native.read_config("pytorch", "parallel_backend", "openmp")
|
||||||
|
defs = []
|
||||||
|
if parallel_backend == "openmp":
|
||||||
|
defs.append("-DAT_PARALLEL_OPENMP_FBCODE=1")
|
||||||
|
elif parallel_backend == "tbb":
|
||||||
|
defs.append("-DAT_PARALLEL_NATIVE_TBB_FBCODE=1")
|
||||||
|
elif parallel_backend == "native":
|
||||||
|
defs.append("-DAT_PARALLEL_NATIVE_FBCODE=1")
|
||||||
|
else:
|
||||||
|
fail("Unsupported parallel backend: " + parallel_backend)
|
||||||
|
if native.read_config("pytorch", "exp_single_thread_pool", "0") == "1":
|
||||||
|
defs.append("-DAT_EXPERIMENTAL_SINGLE_THREAD_POOL=1")
|
||||||
|
mkl_ver = native.read_config("fbcode", "mkl_lp64", "mkl_lp64_omp")
|
||||||
|
if mkl_ver == "mkl_lp64_seq":
|
||||||
|
defs.append("-DATEN_MKL_SEQUENTIAL_FBCODE=1")
|
||||||
|
return defs
|
||||||
|
|
||||||
|
def is_cpu_static_dispatch_build():
|
||||||
|
mode = native.read_config("fbcode", "caffe2_static_dispatch_mode", "none")
|
||||||
|
return mode == "cpu"
|
166
defs_gpu.bzl
Normal file
166
defs_gpu.bzl
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
load("@fbcode_macros//build_defs:native_rules.bzl", "buck_genrule")
|
||||||
|
load(
|
||||||
|
"//caffe2/caffe2:defs_hip.bzl",
|
||||||
|
"get_caffe2_hip_headers",
|
||||||
|
"get_caffe2_hip_srcs",
|
||||||
|
)
|
||||||
|
load(":ufunc_defs.bzl", "aten_ufunc_names")
|
||||||
|
|
||||||
|
ATEN_CUDA_H_PATTERN = [
|
||||||
|
"aten/src/ATen/cuda/*.h",
|
||||||
|
"aten/src/ATen/cuda/detail/*.h",
|
||||||
|
"aten/src/ATen/cuda/nvrtc_stub/*.h",
|
||||||
|
"aten/src/ATen/cuda/*.cuh",
|
||||||
|
"aten/src/ATen/cuda/detail/*.cuh",
|
||||||
|
]
|
||||||
|
|
||||||
|
ATEN_CUDA_CPP_PATTERN = [
|
||||||
|
"aten/src/ATen/cuda/*.cpp",
|
||||||
|
"aten/src/ATen/cuda/detail/*.cpp",
|
||||||
|
"aten/src/ATen/cuda/nvrtc_stub/*.cpp",
|
||||||
|
]
|
||||||
|
|
||||||
|
ATEN_CUDA_CU_PATTERN = [
|
||||||
|
"aten/src/ATen/cuda/*.cu",
|
||||||
|
"aten/src/ATen/cuda/detail/*.cu",
|
||||||
|
]
|
||||||
|
|
||||||
|
ATEN_CUDNN_H_PATTERN = [
|
||||||
|
"aten/src/ATen/cudnn/*.h",
|
||||||
|
"aten/src/ATen/cudnn/*.cuh",
|
||||||
|
]
|
||||||
|
|
||||||
|
ATEN_CUDNN_CPP_PATTERN = ["aten/src/ATen/cudnn/*.cpp"]
|
||||||
|
|
||||||
|
ATEN_MIOPEN_H_PATTERN = [
|
||||||
|
"aten/src/ATen/miopen/*.h",
|
||||||
|
"aten/src/ATen/miopen/*.cuh",
|
||||||
|
]
|
||||||
|
|
||||||
|
ATEN_MIOPEN_CPP_PATTERN = ["aten/src/ATen/miopen/*.cpp"]
|
||||||
|
|
||||||
|
ATEN_NATIVE_CUDNN_CPP_PATTERN = ["aten/src/ATen/native/cudnn/*.cpp"]
|
||||||
|
|
||||||
|
ATEN_NATIVE_MIOPEN_CPP_PATTERN = ["aten/src/ATen/native/miopen/*.cpp"]
|
||||||
|
|
||||||
|
ATEN_NATIVE_CUDA_CU_PATTERN = [
|
||||||
|
"aten/src/ATen/native/cuda/*.cu",
|
||||||
|
"aten/src/ATen/native/nested/cuda/*.cu",
|
||||||
|
"aten/src/ATen/native/quantized/cuda/*.cu",
|
||||||
|
"aten/src/ATen/native/sparse/cuda/*.cu",
|
||||||
|
"aten/src/ATen/native/transformers/**/*.cu",
|
||||||
|
]
|
||||||
|
|
||||||
|
ATEN_NATIVE_CUDA_CPP_PATTERN = [
|
||||||
|
"aten/src/ATen/native/cuda/*.cpp",
|
||||||
|
"aten/src/ATen/native/cuda/linalg/*.cpp",
|
||||||
|
"aten/src/ATen/native/nested/cuda/*.cpp",
|
||||||
|
"aten/src/ATen/native/sparse/cuda/*.cpp",
|
||||||
|
"aten/src/ATen/native/transformers/cuda/*.cpp",
|
||||||
|
]
|
||||||
|
|
||||||
|
ATEN_NATIVE_CUDA_H_PATTERN = [
|
||||||
|
"aten/src/ATen/native/cudnn/**/*.h",
|
||||||
|
"aten/src/ATen/native/cuda/**/*.h",
|
||||||
|
"aten/src/ATen/native/cuda/**/*.cuh",
|
||||||
|
"aten/src/ATen/native/sparse/cuda/*.h",
|
||||||
|
"aten/src/ATen/native/sparse/cuda/*.cuh",
|
||||||
|
"aten/src/ATen/native/quantized/cuda/*.h",
|
||||||
|
"aten/src/ATen/native/transformers/cuda/*.h",
|
||||||
|
"aten/src/ATen/native/transformers/**/*.cuh",
|
||||||
|
]
|
||||||
|
|
||||||
|
# T66678203: Clang CUDA rollout
|
||||||
|
ATEN_CUDA_CLANG_CU_PATTERN = [
|
||||||
|
"aten/src/ATen/native/cuda/DistributionBernoulli.cu",
|
||||||
|
]
|
||||||
|
|
||||||
|
### Cuda Files
|
||||||
|
def get_aten_cuda_headers():
|
||||||
|
ATEN_CUDA_H = native.glob(ATEN_CUDA_H_PATTERN)
|
||||||
|
ATEN_NATIVE_CUDA_H = native.glob(ATEN_NATIVE_CUDA_H_PATTERN)
|
||||||
|
ATEN_CUDNN_H = native.glob(ATEN_CUDNN_H_PATTERN)
|
||||||
|
return ATEN_CUDA_H + ATEN_NATIVE_CUDA_H + ATEN_CUDNN_H
|
||||||
|
|
||||||
|
def get_aten_cuda_srcs():
|
||||||
|
ATEN_CUDA_CU = native.glob(ATEN_CUDA_CU_PATTERN)
|
||||||
|
ATEN_NATIVE_CUDA_CU = native.glob(
|
||||||
|
ATEN_NATIVE_CUDA_CU_PATTERN,
|
||||||
|
exclude = ATEN_CUDA_CLANG_CU_PATTERN,
|
||||||
|
)
|
||||||
|
return ATEN_CUDA_CU + ATEN_NATIVE_CUDA_CU
|
||||||
|
|
||||||
|
def get_aten_cuda_clang_srcs():
|
||||||
|
return native.glob(ATEN_CUDA_CLANG_CU_PATTERN)
|
||||||
|
|
||||||
|
# CPU+CUDA file
|
||||||
|
# Note that these sources and headers include the CPU lists too
|
||||||
|
def get_all_cuda_srcs():
|
||||||
|
ATEN_NATIVE_CUDNN_CPP = native.glob(ATEN_NATIVE_CUDNN_CPP_PATTERN)
|
||||||
|
ATEN_CUDNN_CPP = native.glob(ATEN_CUDNN_CPP_PATTERN)
|
||||||
|
ATEN_NATIVE_MIOPEN_CPP = native.glob(ATEN_NATIVE_MIOPEN_CPP_PATTERN)
|
||||||
|
ATEN_CUDA_CPP = native.glob(ATEN_CUDA_CPP_PATTERN)
|
||||||
|
ATEN_NATIVE_CUDA_CPP = native.glob(ATEN_NATIVE_CUDA_CPP_PATTERN)
|
||||||
|
|
||||||
|
return ATEN_NATIVE_CUDNN_CPP + ATEN_CUDNN_CPP + ATEN_NATIVE_MIOPEN_CPP + ATEN_CUDA_CPP + ATEN_NATIVE_CUDA_CPP + get_aten_cuda_srcs()
|
||||||
|
|
||||||
|
### HIP files
|
||||||
|
# Files that must be hipified
|
||||||
|
def get_aten_hip_srcs():
|
||||||
|
## CU -> HIP files
|
||||||
|
ATEN_CUDA_CU = native.glob(ATEN_CUDA_CU_PATTERN)
|
||||||
|
|
||||||
|
# HIP does not use clang for ATEN_CUDA_CLANG_CU_PATTERN
|
||||||
|
ATEN_NATIVE_CUDA_CU = native.glob(ATEN_NATIVE_CUDA_CU_PATTERN)
|
||||||
|
|
||||||
|
## CPU files
|
||||||
|
ATEN_NATIVE_CUDNN_CPP = native.glob(ATEN_NATIVE_CUDNN_CPP_PATTERN)
|
||||||
|
ATEN_CUDNN_CPP = native.glob(ATEN_CUDNN_CPP_PATTERN)
|
||||||
|
ATEN_CUDA_CPP = native.glob(ATEN_CUDA_CPP_PATTERN)
|
||||||
|
ATEN_NATIVE_CUDA_CPP = native.glob(ATEN_NATIVE_CUDA_CPP_PATTERN)
|
||||||
|
|
||||||
|
# Get hipified file names (before, after)
|
||||||
|
srcs = ATEN_CUDA_CU + ATEN_NATIVE_CUDA_CU + ATEN_NATIVE_CUDNN_CPP + ATEN_CUDNN_CPP + ATEN_CUDA_CPP + ATEN_NATIVE_CUDA_CPP
|
||||||
|
ret = get_caffe2_hip_srcs(include_patterns = [], include_files = srcs, project_dir = "")
|
||||||
|
return (ret[0], [f.replace("aten/src/", "") for f in ret[1]])
|
||||||
|
|
||||||
|
def get_aten_hip_headers():
|
||||||
|
ATEN_CUDA_H = native.glob(ATEN_CUDA_H_PATTERN)
|
||||||
|
ATEN_NATIVE_CUDA_H = native.glob(ATEN_NATIVE_CUDA_H_PATTERN)
|
||||||
|
ATEN_CUDNN_H = [] # native.glob(ATEN_CUDNN_H_PATTERN)
|
||||||
|
|
||||||
|
# Get hipified file names (before, after)
|
||||||
|
srcs = ATEN_CUDA_H + ATEN_NATIVE_CUDA_H + ATEN_CUDNN_H
|
||||||
|
ret = get_caffe2_hip_headers(include_patterns = [], include_files = ATEN_CUDA_H + ATEN_NATIVE_CUDA_H + ATEN_CUDNN_H, project_dir = "")
|
||||||
|
return ret[0], [f.replace("aten/src/", "") for f in ret[1]]
|
||||||
|
|
||||||
|
# Native HIP-aware files
|
||||||
|
def get_aten_hip_native_srcs():
|
||||||
|
HIP_IMPL_CPP = native.glob(["aten/src/ATen/hip/impl/*.cpp"])
|
||||||
|
ATEN_MIOPEN_CPP = native.glob(ATEN_MIOPEN_CPP_PATTERN)
|
||||||
|
ATEN_NATIVE_MIOPEN_CPP = native.glob(ATEN_NATIVE_MIOPEN_CPP_PATTERN)
|
||||||
|
return HIP_IMPL_CPP + ATEN_MIOPEN_CPP + ATEN_NATIVE_MIOPEN_CPP
|
||||||
|
|
||||||
|
def get_aten_hip_native_headers():
|
||||||
|
HIP_IMPL_H = native.glob(["aten/src/ATen/hip/impl/*.h"])
|
||||||
|
ATEN_MIOPEN_H = native.glob(ATEN_MIOPEN_H_PATTERN)
|
||||||
|
return HIP_IMPL_H + ATEN_MIOPEN_H
|
||||||
|
|
||||||
|
def get_aten_hip_ufunc_generated_cuda_sources(gencode_pattern = "{}"):
|
||||||
|
# Contents of these CUDA files do not need to be hipified at this point,
|
||||||
|
# but they must be renamed from ".cu" to ".hip" because, unlike OSS, a compiler
|
||||||
|
# is selected based on a file extension.
|
||||||
|
|
||||||
|
renamed_rules = []
|
||||||
|
for n in aten_ufunc_names:
|
||||||
|
cuda_name = "UfuncCUDA_{}.cu".format(n)
|
||||||
|
hip_name = "UfuncCUDA_{}.hip".format(n)
|
||||||
|
buck_genrule(
|
||||||
|
name = "aten_ufunc_hip_renamed_{}".format(n),
|
||||||
|
srcs = [gencode_pattern.format(cuda_name)],
|
||||||
|
bash = 'cp "$SRCDIR/{}" "$OUT"'.format(cuda_name),
|
||||||
|
out = hip_name,
|
||||||
|
default_outs = [],
|
||||||
|
)
|
||||||
|
renamed_rules.append(":aten_ufunc_hip_renamed_{}".format(n))
|
||||||
|
return renamed_rules
|
136
defs_hip.bzl
Normal file
136
defs_hip.bzl
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
load("@bazel_skylib//lib:paths.bzl", "paths")
|
||||||
|
load("@fbcode//tools/build/buck:rocm_flags.bzl", "get_rocm_arch_args")
|
||||||
|
|
||||||
|
caffe2_includes = [
|
||||||
|
"operators/**/*",
|
||||||
|
"operators/*",
|
||||||
|
"sgd/*",
|
||||||
|
"transforms/*",
|
||||||
|
# distributed folder is managed by its own TARGETS file
|
||||||
|
# "distributed/*",
|
||||||
|
"queue/*",
|
||||||
|
# "binaries/*",
|
||||||
|
"**/*_test*",
|
||||||
|
"core/*",
|
||||||
|
"db/*",
|
||||||
|
"utils/**/*",
|
||||||
|
]
|
||||||
|
|
||||||
|
caffe2_video_image_includes = [
|
||||||
|
"image/*",
|
||||||
|
"video/*",
|
||||||
|
]
|
||||||
|
|
||||||
|
pytorch_includes = [
|
||||||
|
"aten/src/ATen/cuda/*",
|
||||||
|
"aten/src/ATen/native/cuda/*",
|
||||||
|
"aten/src/ATen/native/cuda/linalg/*",
|
||||||
|
"aten/src/ATen/native/cudnn/*",
|
||||||
|
"aten/src/ATen/native/nested/cuda/*",
|
||||||
|
"aten/src/ATen/native/sparse/cuda/*",
|
||||||
|
"aten/src/ATen/native/transformers/cuda/*",
|
||||||
|
"aten/src/THC/*",
|
||||||
|
"aten/src/ATen/test/*",
|
||||||
|
"torch/*",
|
||||||
|
]
|
||||||
|
|
||||||
|
gpu_file_extensions = [".cu", ".c", ".cc", ".cpp"]
|
||||||
|
gpu_header_extensions = [".cuh", ".h", ".hpp"]
|
||||||
|
|
||||||
|
hip_external_deps = [
|
||||||
|
("rocm", None, "amdhip64-lazy"),
|
||||||
|
("rocm", None, "MIOpen-lazy"),
|
||||||
|
("rocm", None, "rccl-lazy"),
|
||||||
|
("rocm", None, "roctracer64-lazy"),
|
||||||
|
]
|
||||||
|
|
||||||
|
hip_pp_flags = [
|
||||||
|
# HIP 4.4.21432 -> TORCH_HIP_VERSION=404
|
||||||
|
"-DTORCH_HIP_VERSION=(FB_HIP_VERSION/100000)",
|
||||||
|
# ROCm 4.5.2 -> ROCM_VERSION=40502
|
||||||
|
"-DROCM_VERSION=FB_ROCM_VERSION",
|
||||||
|
"-DUSE_ROCM=1",
|
||||||
|
"-D__HIP_PLATFORM_HCC__=1",
|
||||||
|
"-D__HIP_NO_HALF_OPERATORS__=1",
|
||||||
|
"-D__HIP_NO_HALF_CONVERSIONS__=1",
|
||||||
|
"-DCUDA_HAS_FP16=1",
|
||||||
|
"-DCAFFE2_USE_MIOPEN",
|
||||||
|
# The c10/cuda/impl/cuda_cmake_macros.h is not generated for the
|
||||||
|
# hip build yet.
|
||||||
|
"-DC10_HIP_NO_CMAKE_CONFIGURE_FILE",
|
||||||
|
# clang with -fopenmp=libgomp (gcc's OpenMP runtime library) produces
|
||||||
|
# single threaded code and doesn't define -D_OPENMP by default.
|
||||||
|
# clang with -fopenmp or -fopenmp=libomp (llvm's OpenMP runtime library)
|
||||||
|
# produces multi-threaded code and defines -D_OPENMP by default.
|
||||||
|
#
|
||||||
|
# hcc currently don't have llvm openmp runtime project builtin.
|
||||||
|
# wrap_hip.py also drops -D_OPENMP if explicitly specified.
|
||||||
|
"-U_OPENMP",
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_hip_flags():
|
||||||
|
return [
|
||||||
|
# Caffe2 cannot be compiled with NDEBUG using ROCm 4.5.2.
|
||||||
|
# TODO: The issue should be fixed properly.
|
||||||
|
"-UNDEBUG",
|
||||||
|
"-Wno-error=absolute-value",
|
||||||
|
"-Wno-macro-redefined",
|
||||||
|
"-Wno-inconsistent-missing-override",
|
||||||
|
"-Wno-exceptions",
|
||||||
|
"-Wno-shift-count-negative",
|
||||||
|
"-Wno-shift-count-overflow",
|
||||||
|
"-Wno-duplicate-decl-specifier",
|
||||||
|
"-Wno-implicit-int-float-conversion",
|
||||||
|
"-Wno-unused-result",
|
||||||
|
"-Wno-pass-failed",
|
||||||
|
"-Wno-unknown-pragmas",
|
||||||
|
"-Wno-cuda-compat",
|
||||||
|
] + get_rocm_arch_args()
|
||||||
|
|
||||||
|
def get_hip_file_path(filepath, is_caffe2 = False):
|
||||||
|
"""
|
||||||
|
this function should be in sync with the hipified script in
|
||||||
|
third-party/hipify_torch/hipify/hipify_python.py
|
||||||
|
unfortunately because it's a normal python (instead of Starlark)
|
||||||
|
we cannot simply import from there
|
||||||
|
|
||||||
|
The general rule of converting file names from cuda to hip is:
|
||||||
|
- If there is a directory component named "cuda", replace
|
||||||
|
it with "hip", AND
|
||||||
|
|
||||||
|
- If the file name contains "CUDA", replace it with "HIP", AND
|
||||||
|
|
||||||
|
If NONE of the above occurred, then insert "hip" in the file path
|
||||||
|
as the direct parent folder of the file
|
||||||
|
|
||||||
|
Furthermore, ALWAYS replace '.cu' with '.hip', because those files
|
||||||
|
contain CUDA kernels that needs to be hipified and processed with
|
||||||
|
hcc compile
|
||||||
|
"""
|
||||||
|
dirpath = paths.dirname(filepath)
|
||||||
|
filename = paths.basename(filepath)
|
||||||
|
filename, ext = paths.split_extension(filename)
|
||||||
|
|
||||||
|
if ext == ".cu":
|
||||||
|
ext = ".hip"
|
||||||
|
|
||||||
|
orig_dirpath = dirpath
|
||||||
|
|
||||||
|
dirpath = dirpath.replace("cuda", "hip")
|
||||||
|
dirpath = dirpath.replace("THC", "THH")
|
||||||
|
|
||||||
|
filename = filename.replace("cuda", "hip")
|
||||||
|
filename = filename.replace("CUDA", "HIP")
|
||||||
|
|
||||||
|
# Special case to handle caffe2/core/THCCachingAllocator
|
||||||
|
if not (is_caffe2 and dirpath == "core"):
|
||||||
|
filename = filename.replace("THC", "THH")
|
||||||
|
|
||||||
|
# if the path doesn't change (e.g., path doesn't include "cuda" so we
|
||||||
|
# cannot differentiate), insert "hip" as the direct parent folder
|
||||||
|
# special case for utils/cub_namespace, because it is first used and hipified when used
|
||||||
|
# from core, it doesn't end up in hip directory
|
||||||
|
if dirpath == orig_dirpath and not filename == "cub_namespace":
|
||||||
|
dirpath = paths.join(dirpath, "hip")
|
||||||
|
|
||||||
|
return paths.join(dirpath, filename + ext)
|
10
ios/METADATA.bzl
Normal file
10
ios/METADATA.bzl
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# THIS FILE IS AUTOMATICALLY GENERATED FROM INFORMATION STORED IN
|
||||||
|
# THIRD-PARTY METADATA SERVICE. YOUR MANUAL CHANGES TO THIS FILE WILL
|
||||||
|
# BE PRESERVED AND WILL SERVE AS THE SOURCE OF TRUTH FOR METADATA OF
|
||||||
|
# THIS PACKAGE.
|
||||||
|
# TPMS-GENERATED: b832a8f526016b30c557d8a58fc89d9338a51cff
|
||||||
|
METADATA = {
|
||||||
|
"name": "LibTorch",
|
||||||
|
"owner": "ai_infra_mobile_platform",
|
||||||
|
"version": "1.11.0",
|
||||||
|
}
|
10
ios/TestApp/METADATA.bzl
Normal file
10
ios/TestApp/METADATA.bzl
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# THIS FILE IS AUTOMATICALLY GENERATED FROM INFORMATION STORED IN
|
||||||
|
# THIRD-PARTY METADATA SERVICE. YOUR MANUAL CHANGES TO THIS FILE WILL
|
||||||
|
# BE PRESERVED AND WILL SERVE AS THE SOURCE OF TRUTH FOR METADATA OF
|
||||||
|
# THIS PACKAGE.
|
||||||
|
# TPMS-GENERATED: ba55575493b7ad21fde900f05f93c501b2715a09
|
||||||
|
METADATA = {
|
||||||
|
"name": "unf_ext",
|
||||||
|
"owner": "ai_infra_mobile_platform",
|
||||||
|
"version": "0.0.7.6",
|
||||||
|
}
|
83
ovrsource_aten_gen_defs.bzl
Normal file
83
ovrsource_aten_gen_defs.bzl
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
# @nolint
|
||||||
|
load("//arvr/tools/build_defs:genrule_utils.bzl", "gen_cmake_header")
|
||||||
|
load("//arvr/tools/build_defs:oxx.bzl", "oxx_static_library")
|
||||||
|
load(
|
||||||
|
"@fbsource//xplat/caffe2:pt_defs.bzl",
|
||||||
|
"gen_aten_files",
|
||||||
|
"get_aten_codegen_extra_params",
|
||||||
|
)
|
||||||
|
|
||||||
|
def define_aten_gen():
|
||||||
|
backends = [
|
||||||
|
"CPU",
|
||||||
|
"SparseCPU",
|
||||||
|
"SparseCsrCPU",
|
||||||
|
# "MkldnnCPU",
|
||||||
|
"CUDA",
|
||||||
|
"SparseCUDA",
|
||||||
|
"SparseCsrCUDA",
|
||||||
|
"QuantizedCPU",
|
||||||
|
"QuantizedCUDA",
|
||||||
|
"Meta",
|
||||||
|
"ZeroTensor"
|
||||||
|
]
|
||||||
|
|
||||||
|
gen_aten_files(
|
||||||
|
name = "gen_aten_ovrsource",
|
||||||
|
extra_flags = get_aten_codegen_extra_params(backends),
|
||||||
|
visibility = ["PUBLIC"],
|
||||||
|
)
|
||||||
|
|
||||||
|
oxx_static_library(
|
||||||
|
name = "ovrsource_aten_generated_cuda_headers",
|
||||||
|
header_namespace = "ATen",
|
||||||
|
public_generated_headers = {
|
||||||
|
"CUDAFunctions.h": ":gen_aten_ovrsource[CUDAFunctions.h]",
|
||||||
|
"CUDAFunctions_inl.h": ":gen_aten_ovrsource[CUDAFunctions_inl.h]",
|
||||||
|
},
|
||||||
|
visibility = ["PUBLIC"],
|
||||||
|
)
|
||||||
|
|
||||||
|
oxx_static_library(
|
||||||
|
name = "ovrsource_aten_generated_meta_headers",
|
||||||
|
header_namespace = "ATen",
|
||||||
|
public_generated_headers = {
|
||||||
|
"MetaFunctions.h": ":gen_aten_ovrsource[MetaFunctions.h]",
|
||||||
|
"MetaFunctions_inl.h": ":gen_aten_ovrsource[MetaFunctions_inl.h]",
|
||||||
|
},
|
||||||
|
visibility = ["PUBLIC"],
|
||||||
|
)
|
||||||
|
|
||||||
|
gen_cmake_header(
|
||||||
|
src = "aten/src/ATen/Config.h.in",
|
||||||
|
defines = [
|
||||||
|
("@AT_MKLDNN_ENABLED@", "0"),
|
||||||
|
("@AT_MKL_ENABLED@", "0"),
|
||||||
|
("@AT_MKL_SEQUENTIAL@", "0"),
|
||||||
|
("@AT_FFTW_ENABLED@", "0"),
|
||||||
|
("@AT_NNPACK_ENABLED@", "0"),
|
||||||
|
("@AT_PARALLEL_OPENMP@", "0"),
|
||||||
|
("@AT_PARALLEL_NATIVE@", "1"),
|
||||||
|
("@AT_PARALLEL_NATIVE_TBB@", "0"),
|
||||||
|
("@AT_POCKETFFT_ENABLED@", "0"),
|
||||||
|
("@CAFFE2_STATIC_LINK_CUDA_INT@", "1"),
|
||||||
|
("@AT_BUILD_WITH_BLAS@", "1"),
|
||||||
|
("@AT_BUILD_WITH_LAPACK@", "1"),
|
||||||
|
("@AT_BLAS_F2C@", "1"),
|
||||||
|
("@AT_BLAS_USE_CBLAS_DOT@", "0")
|
||||||
|
],
|
||||||
|
header = "ATen/Config.h",
|
||||||
|
prefix = "ovrsource_aten_",
|
||||||
|
)
|
||||||
|
|
||||||
|
gen_cmake_header(
|
||||||
|
src = "aten/src/ATen/cuda/CUDAConfig.h.in",
|
||||||
|
defines = [
|
||||||
|
("@AT_CUDNN_ENABLED@", "1"),
|
||||||
|
("@AT_ROCM_ENABLED@", "0"),
|
||||||
|
("@NVCC_FLAGS_EXTRA@", " "),
|
||||||
|
("@AT_MAGMA_ENABLED@", "0")
|
||||||
|
],
|
||||||
|
header = "ATen/cuda/CUDAConfig.h",
|
||||||
|
prefix = "ovrsource_aten_",
|
||||||
|
)
|
87
ovrsource_caffe2_perfkernels_defs.bzl
Normal file
87
ovrsource_caffe2_perfkernels_defs.bzl
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
# @nolint
|
||||||
|
load("//arvr/tools/build_defs:oxx.bzl", "oxx_static_library")
|
||||||
|
load("@fbsource//xplat/caffe2/c10:ovrsource_defs.bzl", "cpu_supported_platforms")
|
||||||
|
|
||||||
|
def define_caffe2_perfkernels():
|
||||||
|
[
|
||||||
|
oxx_static_library(
|
||||||
|
name = "perfkernels_{}_ovrsource".format(arch),
|
||||||
|
srcs = native.glob(["caffe2/perfkernels/*_{}.cc".format(arch)]),
|
||||||
|
compatible_with = ["ovr_config//cpu:x86_64"],
|
||||||
|
compiler_flags = select({
|
||||||
|
"DEFAULT": [],
|
||||||
|
"ovr_config//compiler:cl": [
|
||||||
|
"/arch:AVX2",
|
||||||
|
"/w",
|
||||||
|
],
|
||||||
|
"ovr_config//compiler:clang": [
|
||||||
|
"-Wno-error",
|
||||||
|
"-mf16c",
|
||||||
|
] + (["-mf16c", "-mavx"] if arch == "avx" else ["-mfma", "-mavx2"] if arch == "avx2" else ["-mavx512f"]),
|
||||||
|
}),
|
||||||
|
raw_headers = native.glob([
|
||||||
|
"caffe2/core/*.h",
|
||||||
|
"caffe2/perfkernels/*.h",
|
||||||
|
"caffe2/proto/*.h",
|
||||||
|
"caffe2/utils/*.h",
|
||||||
|
], exclude = [
|
||||||
|
"caffe2/core/macros.h",
|
||||||
|
]),
|
||||||
|
reexport_all_header_dependencies = False,
|
||||||
|
deps = [
|
||||||
|
":caffe2_proto_ovrsource",
|
||||||
|
":ovrsource_caffe2_macros.h",
|
||||||
|
"@fbsource//xplat/caffe2/c10:c10_ovrsource",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
for arch in ["avx", "avx2", "avx512"]
|
||||||
|
]
|
||||||
|
|
||||||
|
oxx_static_library(
|
||||||
|
name = "perfkernels_ovrsource",
|
||||||
|
srcs = native.glob([
|
||||||
|
"caffe2/perfkernels/*.cc",
|
||||||
|
], exclude = [
|
||||||
|
"**/*_avx*",
|
||||||
|
]),
|
||||||
|
compatible_with = cpu_supported_platforms,
|
||||||
|
compiler_flags = select({
|
||||||
|
"DEFAULT": [],
|
||||||
|
"ovr_config//compiler:cl": [
|
||||||
|
"/w",
|
||||||
|
],
|
||||||
|
"ovr_config//compiler:clang": [
|
||||||
|
"-Wno-macro-redefined",
|
||||||
|
"-Wno-shadow",
|
||||||
|
"-Wno-undef",
|
||||||
|
"-Wno-unused-function",
|
||||||
|
"-Wno-unused-local-typedef",
|
||||||
|
"-Wno-unused-variable",
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
public_include_directories = [],
|
||||||
|
public_raw_headers = native.glob([
|
||||||
|
"caffe2/perfkernels/*.h",
|
||||||
|
]),
|
||||||
|
raw_headers = native.glob([
|
||||||
|
"caffe2/core/*.h",
|
||||||
|
"caffe2/proto/*.h",
|
||||||
|
"caffe2/utils/*.h",
|
||||||
|
], exclude = [
|
||||||
|
"caffe2/core/macros.h",
|
||||||
|
]),
|
||||||
|
reexport_all_header_dependencies = False,
|
||||||
|
deps = [
|
||||||
|
":caffe2_proto_ovrsource",
|
||||||
|
":ovrsource_caffe2_macros.h",
|
||||||
|
"//third-party/cpuinfo:cpuinfo",
|
||||||
|
"@fbsource//xplat/caffe2/c10:c10_ovrsource",
|
||||||
|
"//third-party/protobuf:libprotobuf",
|
||||||
|
] + select({
|
||||||
|
"DEFAULT": [],
|
||||||
|
"ovr_config//cpu:x86_64": [
|
||||||
|
":perfkernels_avx_ovrsource",
|
||||||
|
":perfkernels_avx2_ovrsource",
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
)
|
20
ovrsource_caffe2_proto_defs.bzl
Normal file
20
ovrsource_caffe2_proto_defs.bzl
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# @nolint
|
||||||
|
load("//arvr/tools/build_defs:oxx.bzl", "oxx_static_library", "oxx_test")
|
||||||
|
load("//arvr/tools/build_defs:oxx_python.bzl", "oxx_python_binary", "oxx_python_library")
|
||||||
|
load("//arvr/tools/build_defs:genrule_utils.bzl", "gen_cmake_header")
|
||||||
|
load("//arvr/tools/build_defs:protobuf.bzl", "proto_cxx_library")
|
||||||
|
load("@bazel_skylib//lib:paths.bzl", "paths")
|
||||||
|
|
||||||
|
def define_caffe2_proto():
|
||||||
|
proto_cxx_library(
|
||||||
|
name = "caffe2_proto_ovrsource",
|
||||||
|
protos = [
|
||||||
|
"caffe2/proto/caffe2.proto",
|
||||||
|
"caffe2/proto/caffe2_legacy.proto",
|
||||||
|
"caffe2/proto/hsm.proto",
|
||||||
|
"caffe2/proto/metanet.proto",
|
||||||
|
"caffe2/proto/predictor_consts.proto",
|
||||||
|
"caffe2/proto/prof_dag.proto",
|
||||||
|
"caffe2/proto/torch.proto",
|
||||||
|
],
|
||||||
|
)
|
101
ovrsource_nomnigraph_defs.bzl
Normal file
101
ovrsource_nomnigraph_defs.bzl
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
# @nolint
|
||||||
|
load("//arvr/tools/build_defs:oxx.bzl", "oxx_static_library", "oxx_test")
|
||||||
|
load("//arvr/tools/build_defs:oxx_python.bzl", "oxx_python_binary", "oxx_python_library")
|
||||||
|
load("//arvr/tools/build_defs:genrule_utils.bzl", "gen_cmake_header")
|
||||||
|
load("@bazel_skylib//lib:paths.bzl", "paths")
|
||||||
|
|
||||||
|
def define_nomnigraph():
|
||||||
|
oxx_python_binary(
|
||||||
|
name = "nomnigraph_gen_py_ovrsource",
|
||||||
|
main_module = "caffe2.core.nomnigraph.op_gen",
|
||||||
|
deps = [":nomnigraph_gen_py_main_ovrsource"],
|
||||||
|
)
|
||||||
|
|
||||||
|
oxx_python_library(
|
||||||
|
name = "nomnigraph_gen_py_main_ovrsource",
|
||||||
|
srcs = native.glob(["caffe2/core/nomnigraph/*.py"]),
|
||||||
|
base_module = "",
|
||||||
|
)
|
||||||
|
|
||||||
|
nomnigraph_gen_py_cmd = " ".join([
|
||||||
|
"--install_dir=$OUT",
|
||||||
|
"--source_def=caffe2/core/nomnigraph/ops.def",
|
||||||
|
# "--source_def=caffe2/core/nomnigraph/fb/ops.def",
|
||||||
|
])
|
||||||
|
|
||||||
|
native.genrule(
|
||||||
|
name = "nomnigraph_gen_ovrsource",
|
||||||
|
srcs = [
|
||||||
|
# "caffe2/core/nomnigraph/fb/ops.def",
|
||||||
|
"caffe2/core/nomnigraph/op_gen.py",
|
||||||
|
"caffe2/core/nomnigraph/ops.def",
|
||||||
|
],
|
||||||
|
cmd_exe = "mkdir $OUT && $(exe :nomnigraph_gen_py_ovrsource) " + nomnigraph_gen_py_cmd,
|
||||||
|
out = "gen",
|
||||||
|
)
|
||||||
|
|
||||||
|
TEST_SRCS = native.glob([
|
||||||
|
"caffe2/core/nomnigraph/tests/*.cc",
|
||||||
|
], exclude = [
|
||||||
|
"caffe2/core/nomnigraph/tests/GraphTest.cc", # fails because debug iterator check
|
||||||
|
])
|
||||||
|
|
||||||
|
oxx_static_library(
|
||||||
|
name = "nomnigraph_ovrsource",
|
||||||
|
srcs = [
|
||||||
|
"caffe2/core/nomnigraph/Representations/NeuralNet.cc",
|
||||||
|
],
|
||||||
|
compiler_flags = select({
|
||||||
|
"ovr_config//compiler:clang": [
|
||||||
|
"-Wno-undef",
|
||||||
|
"-Wno-shadow",
|
||||||
|
"-Wno-macro-redefined",
|
||||||
|
"-Wno-unused-variable",
|
||||||
|
"-Wno-unused-local-typedef",
|
||||||
|
"-Wno-unused-function",
|
||||||
|
],
|
||||||
|
"DEFAULT": [],
|
||||||
|
}),
|
||||||
|
public_include_directories = ["caffe2/core/nomnigraph/include"],
|
||||||
|
public_raw_headers = native.glob([
|
||||||
|
"caffe2/core/nomnigraph/include/**/*.h",
|
||||||
|
]),
|
||||||
|
raw_headers = ["caffe2/core/common.h"],
|
||||||
|
reexport_all_header_dependencies = False,
|
||||||
|
tests = [
|
||||||
|
":" + paths.basename(filename)[:-len(".cc")] + "_ovrsource"
|
||||||
|
for filename in TEST_SRCS
|
||||||
|
],
|
||||||
|
deps = [
|
||||||
|
":ovrsource_caffe2_macros.h",
|
||||||
|
"@fbsource//xplat/caffe2/c10:c10_ovrsource",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
[
|
||||||
|
oxx_test(
|
||||||
|
name = paths.basename(filename)[:-len(".cc")] + "_ovrsource",
|
||||||
|
srcs = [
|
||||||
|
filename,
|
||||||
|
"caffe2/core/nomnigraph/tests/test_util.cc",
|
||||||
|
],
|
||||||
|
compiler_flags = select({
|
||||||
|
"ovr_config//compiler:clang": [
|
||||||
|
"-Wno-macro-redefined",
|
||||||
|
"-Wno-shadow",
|
||||||
|
"-Wno-undef",
|
||||||
|
"-Wno-unused-variable",
|
||||||
|
],
|
||||||
|
"DEFAULT": [],
|
||||||
|
}),
|
||||||
|
framework = "gtest",
|
||||||
|
oncall = "frl_gemini",
|
||||||
|
raw_headers = native.glob([
|
||||||
|
"caffe2/core/nomnigraph/tests/*.h",
|
||||||
|
]),
|
||||||
|
deps = [
|
||||||
|
":nomnigraph_ovrsource",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
for filename in TEST_SRCS
|
||||||
|
]
|
239
pt_template_srcs.bzl
Normal file
239
pt_template_srcs.bzl
Normal file
@ -0,0 +1,239 @@
|
|||||||
|
# This file keeps a list of PyTorch source files that are used for templated selective build.
|
||||||
|
# NB: as this is PyTorch Edge selective build, we assume only CPU targets are
|
||||||
|
# being built
|
||||||
|
|
||||||
|
load("@bazel_skylib//lib:paths.bzl", "paths")
|
||||||
|
load("@fbsource//tools/build_defs:fbsource_utils.bzl", "is_arvr_mode")
|
||||||
|
load(":build_variables.bzl", "aten_native_source_list")
|
||||||
|
load(
|
||||||
|
":ufunc_defs.bzl",
|
||||||
|
"aten_ufunc_generated_cpu_kernel_sources",
|
||||||
|
"aten_ufunc_generated_cpu_sources",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Files in this list are supposed to be built separately for each app,
|
||||||
|
# for different operator allow lists.
|
||||||
|
TEMPLATE_SOURCE_LIST = [
|
||||||
|
"torch/csrc/jit/runtime/register_prim_ops.cpp",
|
||||||
|
"torch/csrc/jit/runtime/register_special_ops.cpp",
|
||||||
|
] + aten_native_source_list
|
||||||
|
|
||||||
|
# For selective build, we can lump the CPU and CPU kernel sources altogether
|
||||||
|
# because there is only ever one vectorization variant that is compiled
|
||||||
|
def aten_ufunc_generated_all_cpu_sources(gencode_pattern = "{}"):
|
||||||
|
return (
|
||||||
|
aten_ufunc_generated_cpu_sources(gencode_pattern) +
|
||||||
|
aten_ufunc_generated_cpu_kernel_sources(gencode_pattern)
|
||||||
|
)
|
||||||
|
|
||||||
|
TEMPLATE_MASKRCNN_SOURCE_LIST = [
|
||||||
|
"register_maskrcnn_ops.cpp",
|
||||||
|
]
|
||||||
|
|
||||||
|
TEMPLATE_BATCH_BOX_COX_SOURCE_LIST = [
|
||||||
|
"register_batch_box_cox_ops.cpp",
|
||||||
|
]
|
||||||
|
|
||||||
|
METAL_SOURCE_LIST = [
|
||||||
|
"aten/src/ATen/native/metal/MetalAten.mm",
|
||||||
|
"aten/src/ATen/native/metal/MetalGuardImpl.cpp",
|
||||||
|
"aten/src/ATen/native/metal/MetalPrepackOpRegister.cpp",
|
||||||
|
"aten/src/ATen/native/metal/MetalCommandBuffer.mm",
|
||||||
|
"aten/src/ATen/native/metal/MetalContext.mm",
|
||||||
|
"aten/src/ATen/native/metal/MetalConvParams.mm",
|
||||||
|
"aten/src/ATen/native/metal/MetalTensorImplStorage.mm",
|
||||||
|
"aten/src/ATen/native/metal/MetalTensorUtils.mm",
|
||||||
|
"aten/src/ATen/native/metal/mpscnn/MPSCNNClampOp.mm",
|
||||||
|
"aten/src/ATen/native/metal/mpscnn/MPSCNNConvOp.mm",
|
||||||
|
"aten/src/ATen/native/metal/mpscnn/MPSCNNFullyConnectedOp.mm",
|
||||||
|
"aten/src/ATen/native/metal/mpscnn/MPSCNNNeuronOp.mm",
|
||||||
|
"aten/src/ATen/native/metal/mpscnn/MPSCNNUtils.mm",
|
||||||
|
"aten/src/ATen/native/metal/mpscnn/MPSImage+Tensor.mm",
|
||||||
|
"aten/src/ATen/native/metal/mpscnn/MPSImageUtils.mm",
|
||||||
|
"aten/src/ATen/native/metal/mpscnn/MPSImageWrapper.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalAddmm.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalBinaryElementwise.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalChunk.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalClamp.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalConcat.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalConvolution.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalCopy.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalHardswish.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalLeakyReLU.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalNeurons.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalPadding.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalPooling.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalReduce.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalReshape.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalSoftmax.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalTranspose.mm",
|
||||||
|
"aten/src/ATen/native/metal/ops/MetalUpsamplingNearest.mm",
|
||||||
|
]
|
||||||
|
|
||||||
|
UNET_METAL_PREPACK_SOURCE_LIST = [
|
||||||
|
"unet_metal_prepack.cpp",
|
||||||
|
"unet_metal_prepack.mm",
|
||||||
|
]
|
||||||
|
|
||||||
|
METAL_MASKRCNN_SOURCE_LIST = [
|
||||||
|
"maskrcnn/srcs/GenerateProposals.mm",
|
||||||
|
"maskrcnn/srcs/RoIAlign.mm",
|
||||||
|
]
|
||||||
|
|
||||||
|
# The get_template_source_dict() returns a dict containing a path prefix
|
||||||
|
# and a list of .cpp source files containing operator definitions and
|
||||||
|
# registrations that should get selected via templated selective build.
|
||||||
|
# The file selected_mobile_ops.h has the list of selected top level
|
||||||
|
# operators.
|
||||||
|
# NB: doesn't include generated files; copy_template_registration_files
|
||||||
|
# handles those specially
|
||||||
|
def get_template_source_dict():
|
||||||
|
ret = {}
|
||||||
|
for file_path in TEMPLATE_SOURCE_LIST:
|
||||||
|
path_prefix = paths.dirname(file_path)
|
||||||
|
if path_prefix not in ret:
|
||||||
|
ret[path_prefix] = []
|
||||||
|
ret[path_prefix].append(file_path)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def get_gen_oplist_outs():
|
||||||
|
return {
|
||||||
|
"SupportedMobileModelsRegistration.cpp": [
|
||||||
|
"SupportedMobileModelsRegistration.cpp",
|
||||||
|
],
|
||||||
|
"selected_mobile_ops.h": [
|
||||||
|
"selected_mobile_ops.h",
|
||||||
|
],
|
||||||
|
"selected_operators.yaml": [
|
||||||
|
"selected_operators.yaml",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_generate_code_bin_outs():
|
||||||
|
outs = {
|
||||||
|
"autograd/generated/ADInplaceOrViewTypeEverything.cpp": ["autograd/generated/ADInplaceOrViewTypeEverything.cpp"],
|
||||||
|
"autograd/generated/ADInplaceOrViewType_0.cpp": ["autograd/generated/ADInplaceOrViewType_0.cpp"],
|
||||||
|
"autograd/generated/ADInplaceOrViewType_1.cpp": ["autograd/generated/ADInplaceOrViewType_1.cpp"],
|
||||||
|
"autograd/generated/Functions.cpp": ["autograd/generated/Functions.cpp"],
|
||||||
|
"autograd/generated/Functions.h": ["autograd/generated/Functions.h"],
|
||||||
|
"autograd/generated/TraceTypeEverything.cpp": ["autograd/generated/TraceTypeEverything.cpp"],
|
||||||
|
"autograd/generated/TraceType_0.cpp": ["autograd/generated/TraceType_0.cpp"],
|
||||||
|
"autograd/generated/TraceType_1.cpp": ["autograd/generated/TraceType_1.cpp"],
|
||||||
|
"autograd/generated/TraceType_2.cpp": ["autograd/generated/TraceType_2.cpp"],
|
||||||
|
"autograd/generated/TraceType_3.cpp": ["autograd/generated/TraceType_3.cpp"],
|
||||||
|
"autograd/generated/TraceType_4.cpp": ["autograd/generated/TraceType_4.cpp"],
|
||||||
|
"autograd/generated/VariableType.h": ["autograd/generated/VariableType.h"],
|
||||||
|
"autograd/generated/VariableTypeEverything.cpp": ["autograd/generated/VariableTypeEverything.cpp"],
|
||||||
|
"autograd/generated/VariableType_0.cpp": ["autograd/generated/VariableType_0.cpp"],
|
||||||
|
"autograd/generated/VariableType_1.cpp": ["autograd/generated/VariableType_1.cpp"],
|
||||||
|
"autograd/generated/VariableType_2.cpp": ["autograd/generated/VariableType_2.cpp"],
|
||||||
|
"autograd/generated/VariableType_3.cpp": ["autograd/generated/VariableType_3.cpp"],
|
||||||
|
"autograd/generated/VariableType_4.cpp": ["autograd/generated/VariableType_4.cpp"],
|
||||||
|
"autograd/generated/variable_factories.h": ["autograd/generated/variable_factories.h"],
|
||||||
|
}
|
||||||
|
|
||||||
|
if is_arvr_mode():
|
||||||
|
outs.update({
|
||||||
|
"autograd/generated/python_fft_functions.cpp": ["autograd/generated/python_fft_functions.cpp"],
|
||||||
|
"autograd/generated/python_functions.h": ["autograd/generated/python_functions.h"],
|
||||||
|
"autograd/generated/python_functions_0.cpp": ["autograd/generated/python_functions_0.cpp"],
|
||||||
|
"autograd/generated/python_functions_1.cpp": ["autograd/generated/python_functions_1.cpp"],
|
||||||
|
"autograd/generated/python_functions_2.cpp": ["autograd/generated/python_functions_2.cpp"],
|
||||||
|
"autograd/generated/python_functions_3.cpp": ["autograd/generated/python_functions_3.cpp"],
|
||||||
|
"autograd/generated/python_functions_4.cpp": ["autograd/generated/python_functions_4.cpp"],
|
||||||
|
"autograd/generated/python_linalg_functions.cpp": ["autograd/generated/python_linalg_functions.cpp"],
|
||||||
|
"autograd/generated/python_nn_functions.cpp": ["autograd/generated/python_nn_functions.cpp"],
|
||||||
|
"autograd/generated/python_return_types.cpp": ["autograd/generated/python_return_types.cpp"],
|
||||||
|
"autograd/generated/python_sparse_functions.cpp": ["autograd/generated/python_sparse_functions.cpp"],
|
||||||
|
"autograd/generated/python_special_functions.cpp": ["autograd/generated/python_special_functions.cpp"],
|
||||||
|
"autograd/generated/python_torch_functions_0.cpp": ["autograd/generated/python_torch_functions_0.cpp"],
|
||||||
|
"autograd/generated/python_torch_functions_1.cpp": ["autograd/generated/python_torch_functions_1.cpp"],
|
||||||
|
"autograd/generated/python_torch_functions_2.cpp": ["autograd/generated/python_torch_functions_2.cpp"],
|
||||||
|
"autograd/generated/python_variable_methods.cpp": ["autograd/generated/python_variable_methods.cpp"],
|
||||||
|
})
|
||||||
|
return outs
|
||||||
|
|
||||||
|
def get_template_registration_files_outs():
|
||||||
|
outs = {}
|
||||||
|
for file_path in TEMPLATE_MASKRCNN_SOURCE_LIST:
|
||||||
|
outs[file_path] = [file_path]
|
||||||
|
|
||||||
|
for file_path in TEMPLATE_BATCH_BOX_COX_SOURCE_LIST:
|
||||||
|
outs[file_path] = [file_path]
|
||||||
|
|
||||||
|
for file_path in TEMPLATE_SOURCE_LIST:
|
||||||
|
outs[file_path] = [file_path]
|
||||||
|
|
||||||
|
for base_name in aten_ufunc_generated_all_cpu_sources():
|
||||||
|
file_path = "aten/src/ATen/{}".format(base_name)
|
||||||
|
outs[file_path] = [file_path]
|
||||||
|
|
||||||
|
return outs
|
||||||
|
|
||||||
|
def get_template_registration_file_rules(rule_name):
|
||||||
|
rules = []
|
||||||
|
for file_path in TEMPLATE_SOURCE_LIST + TEMPLATE_MASKRCNN_SOURCE_LIST + TEMPLATE_BATCH_BOX_COX_SOURCE_LIST:
|
||||||
|
rules.append(":{}[{}]".format(rule_name, file_path))
|
||||||
|
for file_path in aten_ufunc_generated_all_cpu_sources():
|
||||||
|
rules.append(":{}[aten/src/ATen/{}]".format(rule_name, file_path))
|
||||||
|
|
||||||
|
return rules
|
||||||
|
|
||||||
|
# ---------------------METAL RULES---------------------
|
||||||
|
def get_metal_source_dict():
|
||||||
|
ret = {}
|
||||||
|
for file_path in METAL_SOURCE_LIST:
|
||||||
|
path_prefix = paths.dirname(file_path)
|
||||||
|
if path_prefix not in ret:
|
||||||
|
ret[path_prefix] = []
|
||||||
|
ret[path_prefix].append(file_path)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def get_metal_registration_files_outs():
|
||||||
|
outs = {}
|
||||||
|
for file_path in METAL_SOURCE_LIST:
|
||||||
|
outs[file_path] = [file_path]
|
||||||
|
|
||||||
|
for file_path in UNET_METAL_PREPACK_SOURCE_LIST:
|
||||||
|
outs[file_path] = [file_path]
|
||||||
|
|
||||||
|
for file_path in METAL_MASKRCNN_SOURCE_LIST:
|
||||||
|
outs[file_path] = [file_path]
|
||||||
|
return outs
|
||||||
|
|
||||||
|
# There is a really weird issue with the arvr windows builds where
|
||||||
|
# the custom op files are breaking them. See https://fburl.com/za87443c
|
||||||
|
# The hack is just to not build them for that platform and pray they arent needed.
|
||||||
|
def get_metal_registration_files_outs_windows():
|
||||||
|
outs = {}
|
||||||
|
for file_path in METAL_SOURCE_LIST:
|
||||||
|
outs[file_path] = [file_path]
|
||||||
|
return outs
|
||||||
|
|
||||||
|
def get_metal_registration_files_rules(rule_name):
|
||||||
|
ret = {}
|
||||||
|
objc_rules = []
|
||||||
|
cxx_rules = []
|
||||||
|
|
||||||
|
for file_path in METAL_SOURCE_LIST + METAL_MASKRCNN_SOURCE_LIST + UNET_METAL_PREPACK_SOURCE_LIST:
|
||||||
|
if ".cpp" not in file_path:
|
||||||
|
objc_rules.append(":{}[{}]".format(rule_name, file_path))
|
||||||
|
else:
|
||||||
|
cxx_rules.append(":{}[{}]".format(rule_name, file_path))
|
||||||
|
ret["objc"] = objc_rules
|
||||||
|
ret["cxx"] = cxx_rules
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def get_metal_registration_files_rules_windows(rule_name):
|
||||||
|
ret = {}
|
||||||
|
objc_rules = []
|
||||||
|
cxx_rules = []
|
||||||
|
|
||||||
|
for file_path in METAL_SOURCE_LIST:
|
||||||
|
if ".cpp" not in file_path:
|
||||||
|
objc_rules.append(":{}[{}]".format(rule_name, file_path))
|
||||||
|
else:
|
||||||
|
cxx_rules.append(":{}[{}]".format(rule_name, file_path))
|
||||||
|
ret["objc"] = objc_rules
|
||||||
|
ret["cxx"] = cxx_rules
|
||||||
|
return ret
|
112
test/defs.bzl
Normal file
112
test/defs.bzl
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
load("@fbcode_macros//build_defs:python_pytest.bzl", "python_pytest")
|
||||||
|
load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")
|
||||||
|
load("@fbsource//tools/build_defs/sandcastle:sandcastle_defs.bzl", "is_sandcastle_machine")
|
||||||
|
|
||||||
|
def define_python_unittest(pytest = False, **kwargs):
|
||||||
|
build_mode = native.read_config("fbcode", "build_mode_test_label")
|
||||||
|
enable_flatbuffer = bool(native.read_config("fbcode", "caffe2_enable_flatbuffer", None))
|
||||||
|
|
||||||
|
PYTORCH_TEST_WITH_ASAN = "1" if ("asan" in build_mode or build_mode == "dev") else "0"
|
||||||
|
|
||||||
|
PYTORCH_TEST_WITH_DEV_DBG_ASAN = "1" if (build_mode == "dev" or "dev-asan" in build_mode or "dbg-asan" in build_mode or "dbgo-asan" in build_mode) else "0"
|
||||||
|
|
||||||
|
PYTORCH_TEST_WITH_TSAN = "1" if ("tsan" in build_mode) else "0"
|
||||||
|
|
||||||
|
PYTORCH_TEST_WITH_UBSAN = "1" if ("ubsan" in build_mode or build_mode == "dev") else "0"
|
||||||
|
|
||||||
|
NO_MULTIPROCESSING_SPAWN = "1" if is_sandcastle_machine() else "0"
|
||||||
|
|
||||||
|
ENABLE_FLATBUFFER = "1" if enable_flatbuffer else "0"
|
||||||
|
|
||||||
|
# indicates we are running in test env.
|
||||||
|
# "deepcopy" the 'env: Dict[str, str]'
|
||||||
|
kwargs["env"] = dict(kwargs.get("env", {}))
|
||||||
|
kwargs["env"]["PYTORCH_TEST"] = "1"
|
||||||
|
kwargs["env"]["PYTORCH_TEST_FBCODE"] = "1"
|
||||||
|
kwargs["env"]["PYTORCH_TEST_WITH_ASAN"] = PYTORCH_TEST_WITH_ASAN
|
||||||
|
kwargs["env"]["PYTORCH_TEST_WITH_DEV_DBG_ASAN"] = PYTORCH_TEST_WITH_DEV_DBG_ASAN
|
||||||
|
kwargs["env"]["PYTORCH_TEST_WITH_TSAN"] = PYTORCH_TEST_WITH_TSAN
|
||||||
|
kwargs["env"]["PYTORCH_TEST_WITH_UBSAN"] = PYTORCH_TEST_WITH_UBSAN
|
||||||
|
kwargs["env"]["NO_MULTIPROCESSING_SPAWN"] = NO_MULTIPROCESSING_SPAWN
|
||||||
|
kwargs["env"]["ENABLE_FLATBUFFER"] = ENABLE_FLATBUFFER
|
||||||
|
|
||||||
|
# To speed up TP tests.
|
||||||
|
kwargs["env"]["TENSORPIPE_TLS_DATACENTER"] = "test_dc"
|
||||||
|
|
||||||
|
# Run CUDA tests on GPUs
|
||||||
|
if kwargs.get("name").endswith("cuda"):
|
||||||
|
# "deepcopy" the 'tags: List[str]'
|
||||||
|
kwargs["tags"] = list(kwargs.get("tags", []))
|
||||||
|
kwargs["tags"].extend([
|
||||||
|
"re_opts_capabilities={\"platform\": \"gpu-remote-execution\", \"subplatform\": \"P100\"}",
|
||||||
|
"supports_remote_execution",
|
||||||
|
"run_as_bundle",
|
||||||
|
"tpx:experimental-shard-size-for-bundle=100",
|
||||||
|
])
|
||||||
|
kwargs["env"]["PYTORCH_TEST_REMOTE_GPU"] = "1"
|
||||||
|
|
||||||
|
if pytest:
|
||||||
|
python_pytest(
|
||||||
|
**kwargs
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
python_unittest(
|
||||||
|
**kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
def define_mp_tests(tests, additional_deps = None, pytest = False, **kwargs):
|
||||||
|
# LeakSanitizer doesn't work for python multiprocessing.
|
||||||
|
# See https://fb.workplace.com/groups/fbcode/posts/2625521060818050/
|
||||||
|
# and https://fb.workplace.com/groups/101100140348621/posts/1278688645923092/
|
||||||
|
extra_env = {
|
||||||
|
"ASAN_OPTIONS": "detect_leaks=0",
|
||||||
|
"CUDA_INJECTION64_PATH": "0", # resolve kineto TSAN flakiness
|
||||||
|
}
|
||||||
|
|
||||||
|
# Serialize test cases since multiple tests running on same GPUs can
|
||||||
|
# deadlock or there can be port conflicts.
|
||||||
|
if "tags" not in kwargs:
|
||||||
|
kwargs["tags"] = []
|
||||||
|
if "serialize_test_cases" not in kwargs["tags"]:
|
||||||
|
kwargs["tags"].append("serialize_test_cases")
|
||||||
|
define_tests(tests, additional_deps, pytest, extra_env, **kwargs)
|
||||||
|
|
||||||
|
def define_q_distributed_test(tests, env = None, additional_deps = None, pytest = False, **kwargs):
|
||||||
|
define_tests(tests, additional_deps, pytest, env, **kwargs)
|
||||||
|
|
||||||
|
def define_tests(tests, additional_deps = None, pytest = False, extra_env = {}, **kwargs):
|
||||||
|
if additional_deps == None:
|
||||||
|
additional_deps = {}
|
||||||
|
|
||||||
|
provided_tags = kwargs.pop("tags", [])
|
||||||
|
|
||||||
|
env = {
|
||||||
|
"DOCS_SRC_DIR": "$(location //caffe2/docs/source:doc_files)",
|
||||||
|
"MKL_NUM_THREADS": "1",
|
||||||
|
"OMP_NUM_THREADS": "1",
|
||||||
|
"SKIP_TEST_BOTTLENECK": "1",
|
||||||
|
}
|
||||||
|
env.update(extra_env)
|
||||||
|
for name, srcs in tests.items():
|
||||||
|
tags = list(provided_tags)
|
||||||
|
|
||||||
|
test_deps = ["//caffe2:test-lib"] + additional_deps.get(name, [])
|
||||||
|
define_python_unittest(
|
||||||
|
pytest,
|
||||||
|
name = name,
|
||||||
|
srcs = srcs,
|
||||||
|
base_module = "",
|
||||||
|
compile = "with-source",
|
||||||
|
env = env,
|
||||||
|
py_version = ">=3.5",
|
||||||
|
strip_libpar = True,
|
||||||
|
tags = tags,
|
||||||
|
deps = test_deps,
|
||||||
|
# Depend directly on :libtorch so that tests won't be pruned by the
|
||||||
|
# rdep distance heuristic.
|
||||||
|
cpp_deps = ["//caffe2:libtorch"],
|
||||||
|
runtime_deps = [
|
||||||
|
"//caffe2/docs/source:doc_files",
|
||||||
|
],
|
||||||
|
**kwargs
|
||||||
|
)
|
39
test/distributed/defs.bzl
Normal file
39
test/distributed/defs.bzl
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
load("@fbsource//tools/build_defs:testpilot_defs.bzl", "special_tags")
|
||||||
|
load(
|
||||||
|
"//caffe2/test:defs.bzl",
|
||||||
|
"define_python_unittest",
|
||||||
|
)
|
||||||
|
|
||||||
|
# These distributed tests need custom environment variables
|
||||||
|
def define_distributed_test(**kwargs):
|
||||||
|
# LeakSanitizer doesn't work for python multiprocessing.
|
||||||
|
# See https://fb.workplace.com/groups/fbcode/posts/2625521060818050/
|
||||||
|
# and https://fb.workplace.com/groups/101100140348621/posts/1278688645923092/
|
||||||
|
kwargs["env"]["ASAN_OPTIONS"] = "detect_leaks=0"
|
||||||
|
|
||||||
|
# Resolve kineto TSAN flakiness
|
||||||
|
kwargs["env"]["CUDA_INJECTION64_PATH"] = "0"
|
||||||
|
define_python_unittest(
|
||||||
|
base_module = "",
|
||||||
|
main_module = "fb.test_distributed_trap",
|
||||||
|
py_version = ">=3.5",
|
||||||
|
tags = [special_tags.run_as_bundle],
|
||||||
|
deps = [
|
||||||
|
"//caffe2:test-lib",
|
||||||
|
"//caffe2:torch",
|
||||||
|
"//caffe2/torch/fb/rendezvous:zeus",
|
||||||
|
"//pytorch/vision:torchvision",
|
||||||
|
],
|
||||||
|
external_deps = [
|
||||||
|
("numpy", None),
|
||||||
|
("scipy", None),
|
||||||
|
],
|
||||||
|
**kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
def define_c10d_distributed_test(srcs, **kwargs):
|
||||||
|
srcs.extend(["fb/test_distributed_trap.py"])
|
||||||
|
define_distributed_test(
|
||||||
|
srcs = srcs + native.glob(["data/*.py"]),
|
||||||
|
**kwargs
|
||||||
|
)
|
22
test/distributed/fsdp/defs.bzl
Normal file
22
test/distributed/fsdp/defs.bzl
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
load("@bazel_skylib//lib:paths.bzl", "paths")
|
||||||
|
load(
|
||||||
|
"//caffe2/test:defs.bzl",
|
||||||
|
"define_mp_tests",
|
||||||
|
)
|
||||||
|
|
||||||
|
def define_fsdp_tests():
|
||||||
|
test_files = native.glob(["**/test_*.py"])
|
||||||
|
|
||||||
|
TESTS = {}
|
||||||
|
|
||||||
|
additional_deps = {}
|
||||||
|
for test_file in test_files:
|
||||||
|
test_file_name = paths.basename(test_file)
|
||||||
|
test_name = test_file_name.replace("test_", "").replace(".py", "")
|
||||||
|
TESTS[test_name] = [test_file]
|
||||||
|
additional_deps[test_name] = ["//pytorch/vision:torchvision"]
|
||||||
|
|
||||||
|
define_mp_tests(
|
||||||
|
tests = TESTS,
|
||||||
|
additional_deps = additional_deps,
|
||||||
|
)
|
22
test/distributed/pipeline/sync/defs.bzl
Normal file
22
test/distributed/pipeline/sync/defs.bzl
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
load("@bazel_skylib//lib:paths.bzl", "paths")
|
||||||
|
load(
|
||||||
|
"//caffe2/test:defs.bzl",
|
||||||
|
"define_tests",
|
||||||
|
)
|
||||||
|
|
||||||
|
def define_pipeline_tests():
|
||||||
|
test_files = native.glob(["**/test_*.py"])
|
||||||
|
|
||||||
|
TESTS = {}
|
||||||
|
|
||||||
|
for test_file in test_files:
|
||||||
|
test_file_name = paths.basename(test_file)
|
||||||
|
test_name = test_file_name.replace("test_", "").replace(".py", "")
|
||||||
|
TESTS[test_name] = [test_file]
|
||||||
|
|
||||||
|
define_tests(
|
||||||
|
pytest = True,
|
||||||
|
tests = TESTS,
|
||||||
|
external_deps = [("pytest", None)],
|
||||||
|
resources = ["conftest.py"],
|
||||||
|
)
|
31
third_party/tensorflow_cuda_bazel_build/cuda/build_defs.bzl
vendored
Executable file
31
third_party/tensorflow_cuda_bazel_build/cuda/build_defs.bzl
vendored
Executable file
@ -0,0 +1,31 @@
|
|||||||
|
# Macros for building CUDA code.
|
||||||
|
def if_cuda(if_true, if_false = []):
|
||||||
|
"""Shorthand for select()'ing on whether we're building with CUDA.
|
||||||
|
|
||||||
|
Returns a select statement which evaluates to if_true if we're building
|
||||||
|
with CUDA enabled. Otherwise, the select statement evaluates to if_false.
|
||||||
|
|
||||||
|
"""
|
||||||
|
return select({
|
||||||
|
"@local_config_cuda//cuda:using_clang": if_true,
|
||||||
|
"@local_config_cuda//cuda:using_nvcc": if_true,
|
||||||
|
"//conditions:default": if_false,
|
||||||
|
})
|
||||||
|
|
||||||
|
def cuda_default_copts():
|
||||||
|
"""Default options for all CUDA compilations."""
|
||||||
|
return if_cuda(["-x", "cuda", "-DGOOGLE_CUDA=1"] + [])
|
||||||
|
|
||||||
|
def cuda_is_configured():
|
||||||
|
"""Returns true if CUDA was enabled during the configure process."""
|
||||||
|
return True
|
||||||
|
|
||||||
|
def if_cuda_is_configured(x):
|
||||||
|
"""Tests if the CUDA was enabled during the configure process.
|
||||||
|
|
||||||
|
Unlike if_cuda(), this does not require that we are building with
|
||||||
|
--config=cuda. Used to allow non-CUDA code to depend on CUDA libraries.
|
||||||
|
"""
|
||||||
|
if cuda_is_configured():
|
||||||
|
return x
|
||||||
|
return []
|
12
tools/cpuinfo_target_definition.bzl
Normal file
12
tools/cpuinfo_target_definition.bzl
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
|
||||||
|
load("//caffe2/tools:sgx_target_definitions.bzl", "is_sgx")
|
||||||
|
|
||||||
|
def add_cpuinfo_lib():
|
||||||
|
cpp_library(
|
||||||
|
name = "cpuinfo",
|
||||||
|
exported_deps = [
|
||||||
|
"fbsource//third-party/cpuinfo_sgx:cpuinfo_coffeelake",
|
||||||
|
] if is_sgx else [
|
||||||
|
"fbsource//third-party/cpuinfo:cpuinfo",
|
||||||
|
],
|
||||||
|
)
|
25
tools/miniz_target_definition.bzl
Normal file
25
tools/miniz_target_definition.bzl
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
|
||||||
|
load("//caffe2/tools:sgx_target_definitions.bzl", "is_sgx")
|
||||||
|
|
||||||
|
def add_miniz_lib():
|
||||||
|
cpp_library(
|
||||||
|
name = "miniz",
|
||||||
|
srcs = [
|
||||||
|
"third_party/miniz-2.0.8/fb/FollyCrcPlugin.cpp",
|
||||||
|
"third_party/miniz-2.0.8/fb/miniz-fb.c",
|
||||||
|
],
|
||||||
|
headers = {
|
||||||
|
"caffe2/third_party/miniz-2.0.8/miniz.c": "third_party/miniz-2.0.8/miniz.c",
|
||||||
|
"miniz-fb.h": "third_party/miniz-2.0.8/fb/miniz-fb.h",
|
||||||
|
"miniz.h": "third_party/miniz-2.0.8/miniz.h",
|
||||||
|
},
|
||||||
|
header_namespace = "",
|
||||||
|
# -fexceptions is required, otherwise, when we use @mode/opt-clang-thinlto,
|
||||||
|
# c functions become noexcept, and we may not be able to catch exceptions
|
||||||
|
# during model loading.
|
||||||
|
compiler_flags = ["-DUSE_EXTERNAL_MZCRC", "-fexceptions"] + (["-DMINIZ_NO_STDIO"] if is_sgx else []),
|
||||||
|
# folly is only required as a dependency if USE_EXTERNAL_MZCRC
|
||||||
|
# above is defined, and FollyCrcPlugin.cpp is added.
|
||||||
|
# Neither are strictly needed, but run significantly faster.
|
||||||
|
exported_deps = ["//folly/hash:checksum"],
|
||||||
|
)
|
54
tools/perf_kernel_defs.bzl
Normal file
54
tools/perf_kernel_defs.bzl
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
|
||||||
|
|
||||||
|
is_dbg_build = native.read_config("fbcode", "build_mode", "").find("dbg") != -1
|
||||||
|
is_sanitizer = native.read_config("fbcode", "sanitizer", "") != ""
|
||||||
|
|
||||||
|
def define_perf_kernels(prefix, levels_and_flags, compiler_common_flags, dependencies, external_deps):
|
||||||
|
vectorize_flags = ([
|
||||||
|
# "-Rpass=loop-vectorize", # Add vectorization information to output
|
||||||
|
"-DENABLE_VECTORIZATION=1",
|
||||||
|
"-fveclib=SVML",
|
||||||
|
] if not is_dbg_build and not is_sanitizer else [])
|
||||||
|
|
||||||
|
compiler_specific_flags = {
|
||||||
|
"clang": vectorize_flags,
|
||||||
|
"gcc": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
compiler_specific_flags["clang"] += ["-Wno-pass-failed"]
|
||||||
|
|
||||||
|
common_srcs = native.glob(
|
||||||
|
["**/*.cc"],
|
||||||
|
exclude = [
|
||||||
|
"**/*_avx512.cc",
|
||||||
|
"**/*_avx2.cc",
|
||||||
|
"**/*_avx.cc",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_headers = native.glob(
|
||||||
|
["**/*.h"],
|
||||||
|
)
|
||||||
|
|
||||||
|
kernel_targets = []
|
||||||
|
for level, flags in levels_and_flags:
|
||||||
|
cpp_library(
|
||||||
|
name = prefix + "perfkernels_" + level,
|
||||||
|
srcs = native.glob(["**/*_" + level + ".cc"]),
|
||||||
|
headers = cpp_headers,
|
||||||
|
compiler_flags = compiler_common_flags + flags,
|
||||||
|
compiler_specific_flags = compiler_specific_flags,
|
||||||
|
exported_deps = dependencies,
|
||||||
|
exported_external_deps = external_deps,
|
||||||
|
)
|
||||||
|
kernel_targets.append(":" + prefix + "perfkernels_" + level)
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = prefix + "perfkernels",
|
||||||
|
srcs = common_srcs,
|
||||||
|
headers = cpp_headers,
|
||||||
|
compiler_flags = compiler_common_flags,
|
||||||
|
compiler_specific_flags = compiler_specific_flags,
|
||||||
|
link_whole = True,
|
||||||
|
exported_deps = kernel_targets + dependencies,
|
||||||
|
)
|
9
tools/rules/METADATA.bzl
Normal file
9
tools/rules/METADATA.bzl
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# THIS FILE IS AUTOMATICALLY GENERATED FROM INFORMATION STORED IN
|
||||||
|
# THIRD-PARTY METADATA SERVICE. YOUR MANUAL CHANGES TO THIS FILE WILL
|
||||||
|
# BE PRESERVED AND WILL SERVE AS THE SOURCE OF TRUTH FOR METADATA OF
|
||||||
|
# THIS PACKAGE.
|
||||||
|
# TPMS-GENERATED: b3448f8fd2a893772f944f37627e63917b77dede
|
||||||
|
METADATA = {
|
||||||
|
"name": "rules",
|
||||||
|
"owner": "pytorch_dev_infra",
|
||||||
|
}
|
261
tools/sgx_aten_target_definitions.bzl
Normal file
261
tools/sgx_aten_target_definitions.bzl
Normal file
@ -0,0 +1,261 @@
|
|||||||
|
load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
|
||||||
|
load("@fbcode_macros//build_defs:custom_rule.bzl", "custom_rule")
|
||||||
|
load("//caffe2:build.bzl", "GENERATED_CPP")
|
||||||
|
load("//caffe2:build_variables.bzl", "jit_core_headers", "jit_core_sources")
|
||||||
|
load("//caffe2/tools:sgx_target_definitions.bzl", "is_sgx")
|
||||||
|
|
||||||
|
default_compiler_flags = [
|
||||||
|
"-Wno-error=strict-aliasing",
|
||||||
|
"-Wno-unused-local-typedefs",
|
||||||
|
"-Wno-shadow-compatible-local",
|
||||||
|
"-Wno-maybe-uninitialized", # aten is built with gcc as part of HHVM
|
||||||
|
"-Wno-unknown-pragmas",
|
||||||
|
"-Wno-strict-overflow",
|
||||||
|
# See https://fb.facebook.com/groups/fbcode/permalink/1813348245368673/
|
||||||
|
# These trigger on platform007
|
||||||
|
"-Wno-stringop-overflow",
|
||||||
|
"-Wno-class-memaccess",
|
||||||
|
"-DHAVE_MMAP",
|
||||||
|
"-DUSE_GCC_ATOMICS=1",
|
||||||
|
"-D_FILE_OFFSET_BITS=64",
|
||||||
|
"-DHAVE_SHM_OPEN=1",
|
||||||
|
"-DHAVE_SHM_UNLINK=1",
|
||||||
|
"-DHAVE_MALLOC_USABLE_SIZE=1",
|
||||||
|
"-DTH_HAVE_THREAD",
|
||||||
|
"-DCPU_CAPABILITY_DEFAULT",
|
||||||
|
"-DTH_INDEX_BASE=0",
|
||||||
|
"-DMAGMA_V2",
|
||||||
|
"-DNO_CUDNN_DESTROY_HANDLE",
|
||||||
|
"-DUSE_QNNPACK",
|
||||||
|
"-DUSE_PYTORCH_QNNPACK",
|
||||||
|
# The dynamically loaded NVRTC trick doesn't work in fbcode,
|
||||||
|
# and it's not necessary anyway, because we have a stub
|
||||||
|
# nvrtc library which we load canonically anyway
|
||||||
|
"-DUSE_DIRECT_NVRTC",
|
||||||
|
"-DUSE_XNNPACK",
|
||||||
|
"-Wno-error=uninitialized",
|
||||||
|
]
|
||||||
|
|
||||||
|
compiler_specific_flags = {
|
||||||
|
"clang": [
|
||||||
|
"-Wno-absolute-value",
|
||||||
|
"-Wno-pass-failed",
|
||||||
|
"-Wno-braced-scalar-init",
|
||||||
|
],
|
||||||
|
"gcc": [
|
||||||
|
"-Wno-error=array-bounds",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
def add_sgx_aten_libs(ATEN_HEADERS_CPU_MKL, ATEN_SRCS_CPU_MKL, ATEN_CORE_CPP):
|
||||||
|
# we do not need to define these targets if we are in not SGX mode
|
||||||
|
if not is_sgx:
|
||||||
|
return
|
||||||
|
|
||||||
|
x64_compiler_flags = [
|
||||||
|
"-DUSE_SSE2",
|
||||||
|
"-DUSE_SSE3",
|
||||||
|
"-DUSE_SSE4_1",
|
||||||
|
"-DUSE_SSE4_2",
|
||||||
|
# dont enable AVX2 because we dont have runtime dispatch
|
||||||
|
"-DCPU_CAPABILITY_DEFAULT",
|
||||||
|
"-DCPU_CAPABILITY=DEFAULT",
|
||||||
|
"-DTH_INDEX_BASE=0",
|
||||||
|
"-DTH_INDEX_BASE=0",
|
||||||
|
"-msse",
|
||||||
|
"-msse2",
|
||||||
|
"-msse3",
|
||||||
|
"-msse4",
|
||||||
|
"-msse4.1",
|
||||||
|
"-msse4.2",
|
||||||
|
"-mavx",
|
||||||
|
"-mavx2",
|
||||||
|
]
|
||||||
|
|
||||||
|
cpu_preprocessor_flags = [
|
||||||
|
"-DATEN_MKLDNN_ENABLED_FBCODE=0",
|
||||||
|
"-DATEN_NNPACK_ENABLED_FBCODE=0",
|
||||||
|
"-DATEN_MKL_ENABLED_FBCODE=0",
|
||||||
|
"-DAT_BUILD_WITH_BLAS_FBCODE=1",
|
||||||
|
"-DAT_BLAS_USE_CBLAS_DOT_FBCODE=1",
|
||||||
|
"-DAT_BLAS_F2C_FBCODE=0",
|
||||||
|
"-DATEN_CUDNN_ENABLED_FBCODE=1",
|
||||||
|
"-DATEN_ROCM_ENABLED_FBCODE=0",
|
||||||
|
"-DC10_MOBILE",
|
||||||
|
"-DAT_PARALLEL_NATIVE_FBCODE=1",
|
||||||
|
]
|
||||||
|
|
||||||
|
custom_rule(
|
||||||
|
name = "generate-sgx-config",
|
||||||
|
srcs = [
|
||||||
|
"src/ATen/Config.h.in",
|
||||||
|
],
|
||||||
|
build_args = " ".join([
|
||||||
|
"--input-file",
|
||||||
|
"src/ATen/Config.h.in",
|
||||||
|
"--output-file",
|
||||||
|
"Config.h",
|
||||||
|
"--replace",
|
||||||
|
"@AT_MKLDNN_ENABLED@",
|
||||||
|
"0",
|
||||||
|
"--replace",
|
||||||
|
"@AT_MKL_ENABLED@",
|
||||||
|
"0",
|
||||||
|
"--replace",
|
||||||
|
"@AT_MKL_SEQUENTIAL@",
|
||||||
|
"0",
|
||||||
|
"--replace",
|
||||||
|
"@AT_FFTW_ENABLED@",
|
||||||
|
"0",
|
||||||
|
"--replace",
|
||||||
|
"@AT_POCKETFFT_ENABLED@",
|
||||||
|
"0",
|
||||||
|
"--replace",
|
||||||
|
"@AT_NNPACK_ENABLED@",
|
||||||
|
"ATEN_NNPACK_ENABLED_FBCODE",
|
||||||
|
"--replace",
|
||||||
|
"@AT_BUILD_WITH_BLAS@",
|
||||||
|
"1",
|
||||||
|
"--replace",
|
||||||
|
"@AT_BUILD_WITH_LAPACK@",
|
||||||
|
"0",
|
||||||
|
"--replace",
|
||||||
|
"@CAFFE2_STATIC_LINK_CUDA_INT@",
|
||||||
|
"0",
|
||||||
|
"--replace",
|
||||||
|
"@AT_BLAS_F2C@",
|
||||||
|
"AT_BLAS_F2C_FBCODE",
|
||||||
|
"--replace",
|
||||||
|
"@AT_BLAS_USE_CBLAS_DOT@",
|
||||||
|
"AT_BLAS_USE_CBLAS_DOT_FBCODE",
|
||||||
|
"--replace",
|
||||||
|
"@AT_PARALLEL_OPENMP@",
|
||||||
|
"0",
|
||||||
|
"--replace",
|
||||||
|
"@AT_PARALLEL_NATIVE@",
|
||||||
|
"1",
|
||||||
|
"--replace",
|
||||||
|
"@AT_PARALLEL_NATIVE_TBB@",
|
||||||
|
"0",
|
||||||
|
]),
|
||||||
|
build_script_dep = "//caffe2:substitute",
|
||||||
|
output_gen_files = ["Config.h"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "generated-sgx-config-header",
|
||||||
|
headers = [":generate-sgx-config=Config.h"],
|
||||||
|
header_namespace = "ATen",
|
||||||
|
)
|
||||||
|
|
||||||
|
ATEN_CORE_H = native.glob([
|
||||||
|
"src/ATen/core/*.h",
|
||||||
|
"src/ATen/core/boxing/*.h",
|
||||||
|
"src/ATen/core/boxing/impl/*.h",
|
||||||
|
"src/ATen/core/dispatch/*.h",
|
||||||
|
"src/ATen/core/op_registration/*.h",
|
||||||
|
]) + [
|
||||||
|
"src/ATen/CPUGeneratorImpl.h",
|
||||||
|
"src/ATen/NumericUtils.h",
|
||||||
|
]
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "ATen-core-sgx-headers",
|
||||||
|
headers = ATEN_CORE_H,
|
||||||
|
propagated_pp_flags = [
|
||||||
|
"-Icaffe2/aten/src",
|
||||||
|
],
|
||||||
|
exported_deps = [
|
||||||
|
"//caffe2:generated-aten-headers-core",
|
||||||
|
"//caffe2/c10:c10",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "ATen-sgx-core",
|
||||||
|
# Sorry, this is duped with GENERATED_CPP_CORE. I was too lazy to refactor
|
||||||
|
# the list into a bzl file
|
||||||
|
srcs = ATEN_CORE_CPP + [
|
||||||
|
":gen_aten=Operators_0.cpp",
|
||||||
|
":gen_aten=Operators_1.cpp",
|
||||||
|
":gen_aten=Operators_2.cpp",
|
||||||
|
":gen_aten=Operators_3.cpp",
|
||||||
|
":gen_aten=Operators_4.cpp",
|
||||||
|
":gen_aten=core/ATenOpList.cpp",
|
||||||
|
":gen_aten=core/TensorMethods.cpp",
|
||||||
|
],
|
||||||
|
headers = native.glob([
|
||||||
|
"src/ATen/*.h",
|
||||||
|
"src/ATen/ops/*.h",
|
||||||
|
"src/ATen/quantized/*.h",
|
||||||
|
]),
|
||||||
|
compiler_flags = default_compiler_flags,
|
||||||
|
compiler_specific_flags = compiler_specific_flags,
|
||||||
|
link_whole = True,
|
||||||
|
# Tests that fail in CPU static dispatch mode because they require
|
||||||
|
# the dispatcher in order to work can be gated out with `#ifndef
|
||||||
|
# ATEN_CPU_STATIC_DISPATCH`.
|
||||||
|
propagated_pp_flags = [],
|
||||||
|
# Must be linked with caffe2_core
|
||||||
|
undefined_symbols = True,
|
||||||
|
exported_deps = [
|
||||||
|
":ATen-core-sgx-headers",
|
||||||
|
"//caffe2:jit-core-sgx",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "ATen-sgx-cpu",
|
||||||
|
srcs = ATEN_SRCS_CPU_MKL + [":gen_aten=" + x for x in GENERATED_CPP],
|
||||||
|
headers = ATEN_HEADERS_CPU_MKL,
|
||||||
|
arch_compiler_flags = {"x86_64": x64_compiler_flags},
|
||||||
|
compiler_flags = default_compiler_flags,
|
||||||
|
compiler_specific_flags = compiler_specific_flags,
|
||||||
|
include_directories = [
|
||||||
|
"src",
|
||||||
|
"src/TH",
|
||||||
|
],
|
||||||
|
link_whole = True,
|
||||||
|
propagated_pp_flags = cpu_preprocessor_flags,
|
||||||
|
exported_deps = [
|
||||||
|
"fbsource//third-party/cpuinfo_sgx:cpuinfo_coffeelake",
|
||||||
|
":ATen-sgx-core",
|
||||||
|
":aten-headers-cpu",
|
||||||
|
":generated-aten-headers-cpu",
|
||||||
|
":generated-sgx-config-header",
|
||||||
|
":generated-sgx-th-general-header",
|
||||||
|
":generated-sgx-th-general-header-no-prefix",
|
||||||
|
"//caffe2/caffe2:caffe2_sgx_core",
|
||||||
|
"//caffe2/caffe2/perfkernels:sgx_perfkernels",
|
||||||
|
"//xplat/third-party/XNNPACK:XNNPACK",
|
||||||
|
],
|
||||||
|
exported_external_deps = [
|
||||||
|
("OpenBLAS", None, "OpenBLAS"),
|
||||||
|
],
|
||||||
|
deps = [
|
||||||
|
"//caffe2/aten/src/ATen/native/quantized/cpu/qnnpack:pytorch_qnnpack",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_sgx_aten_jit_libs():
|
||||||
|
# we do not need to define these targets if we are in not SGX mode
|
||||||
|
if not is_sgx:
|
||||||
|
return
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "jit-core-sgx",
|
||||||
|
# Sorry, this is duped with GENERATED_CPP_CORE. I was too lazy to refactor
|
||||||
|
# the list into a bzl file
|
||||||
|
srcs = jit_core_sources,
|
||||||
|
headers = jit_core_headers,
|
||||||
|
compiler_flags = default_compiler_flags,
|
||||||
|
compiler_specific_flags = compiler_specific_flags,
|
||||||
|
include_directories = [""],
|
||||||
|
link_whole = True,
|
||||||
|
# Must be linked with caffe2_core
|
||||||
|
undefined_symbols = True,
|
||||||
|
exported_deps = [
|
||||||
|
"//caffe2:ATen-core-sgx-headers",
|
||||||
|
"//caffe2/c10:c10",
|
||||||
|
],
|
||||||
|
)
|
253
tools/sgx_caffe2_target_definitions.bzl
Normal file
253
tools/sgx_caffe2_target_definitions.bzl
Normal file
@ -0,0 +1,253 @@
|
|||||||
|
load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
|
||||||
|
load("//caffe2/caffe2:defs.bzl", "get_sgx_patterns")
|
||||||
|
load("//caffe2/tools:perf_kernel_defs.bzl", "define_perf_kernels")
|
||||||
|
load("//caffe2/tools:sgx_target_definitions.bzl", "is_sgx")
|
||||||
|
|
||||||
|
def add_sgx_caffe_libs():
|
||||||
|
# we do not need to define these targets if we are in not SGX mode
|
||||||
|
if not is_sgx:
|
||||||
|
return
|
||||||
|
|
||||||
|
core_file_patterns = [
|
||||||
|
"core/allocator.cc",
|
||||||
|
"core/logging.cc",
|
||||||
|
"core/flags.cc",
|
||||||
|
"core/common.cc",
|
||||||
|
"core/context.cc",
|
||||||
|
"core/event.cc",
|
||||||
|
"core/context_base.cc",
|
||||||
|
"core/numa.cc",
|
||||||
|
"core/blob_serialization.cc",
|
||||||
|
"core/tensor.cc",
|
||||||
|
"core/types.cc",
|
||||||
|
"core/blob_stats.cc",
|
||||||
|
"opt/converter.cc",
|
||||||
|
"opt/annotations.cc",
|
||||||
|
"utils/cpuid.cc",
|
||||||
|
"utils/threadpool/ThreadPool.cc",
|
||||||
|
"utils/threadpool/pthreadpool-cpp.cc",
|
||||||
|
"utils/threadpool/thread_pool_guard.cpp",
|
||||||
|
"utils/proto_utils.cc",
|
||||||
|
]
|
||||||
|
|
||||||
|
core_srcs = native.glob(
|
||||||
|
core_file_patterns,
|
||||||
|
)
|
||||||
|
|
||||||
|
core_external_deps = [
|
||||||
|
"protobuf",
|
||||||
|
"glog",
|
||||||
|
"sparsehash",
|
||||||
|
"zstd",
|
||||||
|
]
|
||||||
|
|
||||||
|
core_internal_deps = [
|
||||||
|
"fbsource//third-party/fmt:fmt",
|
||||||
|
"//caffe/proto:fb_protobuf",
|
||||||
|
"//caffe2/caffe2/proto:fb_protobuf",
|
||||||
|
"//caffe2/c10:c10",
|
||||||
|
"//common/base:exception",
|
||||||
|
"//common/logging:logging",
|
||||||
|
]
|
||||||
|
|
||||||
|
internal_deps = core_internal_deps + [
|
||||||
|
# "//libfb/py/mkl:mkl_dep_handle_lp64",
|
||||||
|
"//onnx/onnx:onnx_lib",
|
||||||
|
"//foxi:foxi_loader",
|
||||||
|
"//caffe2/caffe2/fb/onnxifi:fbonnxifi_loader_stub",
|
||||||
|
# "//rocksdb:rocksdb",
|
||||||
|
"//caffe2:cpuinfo",
|
||||||
|
"//xplat/QNNPACK:QNNPACK",
|
||||||
|
"//folly/experimental/symbolizer:symbolizer",
|
||||||
|
"//folly/hash:hash",
|
||||||
|
"//folly/io:iobuf",
|
||||||
|
"//folly:conv",
|
||||||
|
"//folly:dynamic",
|
||||||
|
"//folly:executor",
|
||||||
|
"//folly:format",
|
||||||
|
"//folly:json",
|
||||||
|
"//folly:map_util",
|
||||||
|
"//folly:memory",
|
||||||
|
"//folly:mpmc_queue",
|
||||||
|
"//folly:optional",
|
||||||
|
"//folly:random",
|
||||||
|
"//folly:range",
|
||||||
|
"//folly/synchronization:rw_spin_lock",
|
||||||
|
"//folly:singleton",
|
||||||
|
"//folly:string",
|
||||||
|
"//folly:synchronized",
|
||||||
|
"//folly:thread_local",
|
||||||
|
"//folly:traits",
|
||||||
|
"//caffe2:ATen-core-headers",
|
||||||
|
# important dependency to claim space for future refactorings
|
||||||
|
"//caffe2:ATen-cpu",
|
||||||
|
"//caffe2/caffe2/perfkernels:perfkernels",
|
||||||
|
"//xplat/third-party/FP16:FP16",
|
||||||
|
"fbsource//third-party/neon2sse:neon2sse",
|
||||||
|
]
|
||||||
|
|
||||||
|
exclude = [
|
||||||
|
# hip files are obtained from defs_hip.bzl
|
||||||
|
# do not include in the cpu/cuda build
|
||||||
|
"**/hip/**/*",
|
||||||
|
"test/caffe2_gtest_main.cc",
|
||||||
|
"quantization/server/**/*",
|
||||||
|
"fb/async/comm/**/*",
|
||||||
|
"fb/monitoring/**/*",
|
||||||
|
"fb/session/**/*",
|
||||||
|
# utils/knobs.cc and utils/knob_patcher.cc are only used in the open-source build
|
||||||
|
# The internal build uses versions from fb/utils/ instead.
|
||||||
|
"utils/knobs.cc",
|
||||||
|
"utils/knob_patcher.cc",
|
||||||
|
]
|
||||||
|
|
||||||
|
core_file_patterns = [
|
||||||
|
"core/allocator.cc",
|
||||||
|
"core/logging.cc",
|
||||||
|
"core/flags.cc",
|
||||||
|
"core/common.cc",
|
||||||
|
"core/context.cc",
|
||||||
|
"core/event.cc",
|
||||||
|
"core/context_base.cc",
|
||||||
|
"core/numa.cc",
|
||||||
|
"core/blob_serialization.cc",
|
||||||
|
"core/tensor.cc",
|
||||||
|
"core/types.cc",
|
||||||
|
"core/blob_stats.cc",
|
||||||
|
"opt/converter.cc",
|
||||||
|
"opt/annotations.cc",
|
||||||
|
"utils/cpuid.cc",
|
||||||
|
"utils/threadpool/ThreadPool.cc",
|
||||||
|
"utils/threadpool/pthreadpool-cpp.cc",
|
||||||
|
"utils/threadpool/thread_pool_guard.cpp",
|
||||||
|
"utils/proto_utils.cc",
|
||||||
|
]
|
||||||
|
|
||||||
|
test_file_patterns = get_sgx_patterns([
|
||||||
|
"_test.cc",
|
||||||
|
"_test.cpp",
|
||||||
|
])
|
||||||
|
|
||||||
|
gpu_file_patterns = get_sgx_patterns([
|
||||||
|
"_gpu.cc",
|
||||||
|
"_cudnn.cc",
|
||||||
|
])
|
||||||
|
|
||||||
|
cpu_file_patterns = get_sgx_patterns([
|
||||||
|
".cc",
|
||||||
|
".cpp",
|
||||||
|
])
|
||||||
|
|
||||||
|
cpp_srcs = native.glob(
|
||||||
|
cpu_file_patterns,
|
||||||
|
exclude = exclude + gpu_file_patterns + test_file_patterns + core_file_patterns,
|
||||||
|
)
|
||||||
|
|
||||||
|
pp_flags = [
|
||||||
|
"-Icaffe2",
|
||||||
|
"-Imodules",
|
||||||
|
"-DEIGEN_NO_DEBUG",
|
||||||
|
"-DCAFFE2_USE_GOOGLE_GLOG",
|
||||||
|
"-DCAFFE2_NO_CROSS_ARCH_WARNING",
|
||||||
|
"-DCAFFE2_USE_EXCEPTION_PTR",
|
||||||
|
# Work-around for incompatible thread pools in Caffe2 and NNPACK
|
||||||
|
"-DFBCODE_CAFFE2",
|
||||||
|
"-DUSE_PTHREADPOOL",
|
||||||
|
"-DC10_MOBILE",
|
||||||
|
]
|
||||||
|
|
||||||
|
compiler_flags = [
|
||||||
|
"-Wno-unknown-pragmas",
|
||||||
|
"-Wno-narrowing",
|
||||||
|
"-Wno-missing-braces",
|
||||||
|
"-Wno-strict-overflow",
|
||||||
|
"-mno-avx",
|
||||||
|
"-Wno-error=unused-result",
|
||||||
|
]
|
||||||
|
|
||||||
|
cpu_header_patterns = [
|
||||||
|
"**/*.h",
|
||||||
|
]
|
||||||
|
|
||||||
|
cpp_headers = native.glob(
|
||||||
|
cpu_header_patterns,
|
||||||
|
exclude = exclude,
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "caffe2_sgx_headers",
|
||||||
|
headers = cpp_headers,
|
||||||
|
propagated_pp_flags = pp_flags,
|
||||||
|
exported_deps = core_internal_deps + [
|
||||||
|
"//folly/io/async:async_base",
|
||||||
|
"//caffe2/aten:ATen-core-sgx-headers",
|
||||||
|
],
|
||||||
|
exported_external_deps = core_external_deps,
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "caffe2_sgx_core",
|
||||||
|
srcs = core_srcs + [
|
||||||
|
"serialize/inline_container.cc",
|
||||||
|
"serialize/crc.cc",
|
||||||
|
"serialize/file_adapter.cc",
|
||||||
|
"serialize/istream_adapter.cc",
|
||||||
|
"serialize/read_adapter_interface.cc",
|
||||||
|
],
|
||||||
|
compiler_flags = compiler_flags,
|
||||||
|
link_whole = True,
|
||||||
|
propagated_pp_flags = pp_flags,
|
||||||
|
exported_deps = core_internal_deps + [
|
||||||
|
"//caffe2/aten:ATen-sgx-core",
|
||||||
|
"//caffe2/caffe2/core/nomnigraph:nomnigraph",
|
||||||
|
"//xplat/third-party/pthreadpool:pthreadpool",
|
||||||
|
"//caffe2:miniz",
|
||||||
|
],
|
||||||
|
exported_external_deps = core_external_deps,
|
||||||
|
)
|
||||||
|
|
||||||
|
def add_sgx_perf_kernel_libs():
|
||||||
|
# we do not need to define these targets if we are in not SGX mode
|
||||||
|
if not is_sgx:
|
||||||
|
return
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
"//caffe2/caffe2:caffe2_sgx_headers",
|
||||||
|
"//caffe2/aten:ATen-core-sgx-headers",
|
||||||
|
]
|
||||||
|
|
||||||
|
compiler_common_flags = [
|
||||||
|
"-DCAFFE2_PERF_WITH_AVX2",
|
||||||
|
"-DCAFFE2_PERF_WITH_AVX",
|
||||||
|
]
|
||||||
|
|
||||||
|
external_deps = []
|
||||||
|
|
||||||
|
# these are esentially disabled for hte sgx build but we still need them
|
||||||
|
# to avoid linking issues
|
||||||
|
levels_and_flags = ([
|
||||||
|
(
|
||||||
|
"avx2",
|
||||||
|
[
|
||||||
|
"-mavx2",
|
||||||
|
"-mfma",
|
||||||
|
"-mavx",
|
||||||
|
"-mf16c",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"avx",
|
||||||
|
[
|
||||||
|
"-mavx",
|
||||||
|
"-mf16c",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
])
|
||||||
|
|
||||||
|
define_perf_kernels(
|
||||||
|
prefix = "sgx_",
|
||||||
|
levels_and_flags = levels_and_flags,
|
||||||
|
compiler_common_flags = compiler_common_flags,
|
||||||
|
dependencies = dependencies,
|
||||||
|
external_deps = external_deps,
|
||||||
|
)
|
96
tools/sgx_target_definitions.bzl
Normal file
96
tools/sgx_target_definitions.bzl
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
|
||||||
|
load("@fbsource//tools/build_defs:buckconfig.bzl", "read_bool")
|
||||||
|
load(
|
||||||
|
"//caffe2:build_variables.bzl",
|
||||||
|
"core_sources_common",
|
||||||
|
"core_sources_full_mobile",
|
||||||
|
"core_trainer_sources",
|
||||||
|
"libtorch_extra_sources",
|
||||||
|
"libtorch_generated_sources",
|
||||||
|
)
|
||||||
|
|
||||||
|
is_sgx = read_bool("fbcode", "sgx_mode", False)
|
||||||
|
|
||||||
|
def libtorch_sgx_sources(gencode_pattern = ":generate-code[{}]"):
|
||||||
|
libtorch_core_mobile_sources = sorted(core_sources_common + core_sources_full_mobile + core_trainer_sources)
|
||||||
|
|
||||||
|
sgx_sources_to_exclude = [
|
||||||
|
"torch/csrc/jit/tensorexpr/llvm_codegen.cpp",
|
||||||
|
"torch/csrc/jit/tensorexpr/llvm_jit.cpp",
|
||||||
|
"torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp",
|
||||||
|
]
|
||||||
|
|
||||||
|
return libtorch_generated_sources(gencode_pattern) + [i for i in libtorch_core_mobile_sources if i not in sgx_sources_to_exclude] + [i for i in libtorch_extra_sources if i not in sgx_sources_to_exclude]
|
||||||
|
|
||||||
|
def add_sgx_torch_libs():
|
||||||
|
# we do not need to define these targets if we are in not SGX mode
|
||||||
|
if not is_sgx:
|
||||||
|
return
|
||||||
|
|
||||||
|
compiler_flags_cpu = [
|
||||||
|
"-DNO_CUDNN_DESTROY_HANDLE",
|
||||||
|
"-DPYTORCH_ONNX_CAFFE2_BUNDLE",
|
||||||
|
"-DTORCH_ENABLE_LLVM",
|
||||||
|
"-Wno-write-strings",
|
||||||
|
"-Wno-format",
|
||||||
|
"-Wno-strict-aliasing",
|
||||||
|
"-Wno-non-virtual-dtor",
|
||||||
|
"-Wno-shadow-compatible-local",
|
||||||
|
"-Wno-empty-body",
|
||||||
|
"-DUSE_XNNPACK",
|
||||||
|
]
|
||||||
|
|
||||||
|
propagated_pp_flags_cpu = [
|
||||||
|
"-DSYMBOLICATE_MOBILE_DEBUG_HANDLE",
|
||||||
|
"-DC10_MOBILE",
|
||||||
|
]
|
||||||
|
|
||||||
|
include_directories = [
|
||||||
|
"..",
|
||||||
|
".",
|
||||||
|
"torch/csrc/api/include",
|
||||||
|
"torch/csrc",
|
||||||
|
"torch/csrc/nn",
|
||||||
|
"torch/lib",
|
||||||
|
]
|
||||||
|
|
||||||
|
common_flags = {
|
||||||
|
"compiler_specific_flags": {
|
||||||
|
"clang": [
|
||||||
|
"-Wno-absolute-value",
|
||||||
|
"-Wno-expansion-to-defined",
|
||||||
|
"-Wno-pessimizing-move",
|
||||||
|
"-Wno-return-type-c-linkage",
|
||||||
|
"-Wno-unknown-pragmas",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"headers": native.glob(["torch/csrc/**/*.h", "torch/csrc/generic/*.cpp", "test/cpp/jit/*.h", "test/cpp/tensorexpr/*.h"]),
|
||||||
|
}
|
||||||
|
|
||||||
|
_libtorch_sgx_sources = list(libtorch_sgx_sources())
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "libtorch-sgx",
|
||||||
|
srcs = _libtorch_sgx_sources + [
|
||||||
|
"fb/supported_mobile_models/SupportedMobileModels.cpp",
|
||||||
|
"torch/csrc/jit/mobile/function.cpp",
|
||||||
|
"torch/csrc/jit/mobile/import.cpp",
|
||||||
|
"torch/csrc/jit/mobile/interpreter.cpp",
|
||||||
|
"torch/csrc/jit/mobile/module.cpp", # this is only needed to load the model from caffe2/test/cpp/lite_interpreter_runtime/delegate_test.ptl
|
||||||
|
],
|
||||||
|
link_whole = True,
|
||||||
|
include_directories = include_directories,
|
||||||
|
propagated_pp_flags = propagated_pp_flags_cpu,
|
||||||
|
exported_deps = [
|
||||||
|
":generated-autograd-headers",
|
||||||
|
":generated-version-header",
|
||||||
|
"//caffe2/aten:ATen-sgx-cpu",
|
||||||
|
"//caffe2/caffe2:caffe2_sgx_core",
|
||||||
|
"//onnx/onnx:onnx_lib",
|
||||||
|
],
|
||||||
|
exported_external_deps = [
|
||||||
|
("protobuf", None),
|
||||||
|
],
|
||||||
|
compiler_flags = compiler_flags_cpu,
|
||||||
|
**common_flags
|
||||||
|
)
|
568
tools/target_definitions.bzl
Normal file
568
tools/target_definitions.bzl
Normal file
@ -0,0 +1,568 @@
|
|||||||
|
# @lint-ignore-every BUCKLINT supress the warning for using native
|
||||||
|
load("@bazel_skylib//lib:paths.bzl", "paths")
|
||||||
|
load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
|
||||||
|
load("@fbcode_macros//build_defs:cpp_python_extension.bzl", "cpp_python_extension")
|
||||||
|
load("@fbcode_macros//build_defs:custom_rule.bzl", "custom_rule")
|
||||||
|
load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")
|
||||||
|
load("@fbsource//tools/build_defs:glob_defs.bzl", "glob")
|
||||||
|
load(
|
||||||
|
"//caffe2:build_variables.bzl",
|
||||||
|
"glob_libtorch_python_sources",
|
||||||
|
"libtorch_cuda_sources",
|
||||||
|
"libtorch_nvfuser_generated_headers",
|
||||||
|
"libtorch_nvfuser_runtime_sources",
|
||||||
|
"libtorch_python_cuda_sources",
|
||||||
|
"libtorch_sources",
|
||||||
|
"torch_cpp_srcs",
|
||||||
|
)
|
||||||
|
load(
|
||||||
|
"//caffe2:defs_hip.bzl",
|
||||||
|
"get_hip_flags",
|
||||||
|
"hip_external_deps",
|
||||||
|
"hip_pp_flags",
|
||||||
|
)
|
||||||
|
load("//caffe2/caffe2/fb:defs_gpu.bzl", "gpu_library_selector", "gpu_library_targets", "is_amd_build")
|
||||||
|
load("//tools/build/buck:nccl_deps.bzl", "get_nccl_dependency")
|
||||||
|
|
||||||
|
def _path_to_filename(fname):
|
||||||
|
return paths.split_extension(paths.basename(fname))[0]
|
||||||
|
|
||||||
|
def use_kineto():
|
||||||
|
return native.host_info().os.is_linux and native.host_info().arch.is_x86_64 and not is_amd_build()
|
||||||
|
|
||||||
|
def add_torch_libs():
|
||||||
|
r = {}
|
||||||
|
|
||||||
|
torch_cpp_headers = glob(["torch/csrc/api/include/**/*.h"]) + ["torch/script.h"]
|
||||||
|
libtorch_python_sources = glob_libtorch_python_sources()
|
||||||
|
|
||||||
|
use_mpi = native.read_config("fbcode", "caffe2_use_mpi", None)
|
||||||
|
enable_flatbuffer = bool(native.read_config("fbcode", "caffe2_enable_flatbuffer", None))
|
||||||
|
|
||||||
|
compiler_flags_cpu = [
|
||||||
|
"-DUSE_C10D",
|
||||||
|
"-DUSE_NUMPY",
|
||||||
|
"-DUSE_SCALARS",
|
||||||
|
"-DNO_CUDNN_DESTROY_HANDLE",
|
||||||
|
"-DBUILD_CAFFE2",
|
||||||
|
"-DTORCH_ENABLE_LLVM",
|
||||||
|
"-Wno-write-strings",
|
||||||
|
"-Wno-format",
|
||||||
|
"-Wno-strict-aliasing",
|
||||||
|
"-Wno-non-virtual-dtor",
|
||||||
|
"-Wno-shadow-compatible-local",
|
||||||
|
"-Wno-empty-body",
|
||||||
|
] + ([] if native.host_info().os.is_windows else [
|
||||||
|
# XNNPACK depends on an updated version of pthreadpool interface, whose implementation
|
||||||
|
# includes <pthread.h> - a header not available on Windows.
|
||||||
|
"-DUSE_XNNPACK",
|
||||||
|
])
|
||||||
|
|
||||||
|
# We should really include preprocessor flags here
|
||||||
|
# instead of compiler_flags
|
||||||
|
propagated_pp_flags_cpu = [
|
||||||
|
"-DSYMBOLICATE_MOBILE_DEBUG_HANDLE",
|
||||||
|
"-DUSE_DISTRIBUTED",
|
||||||
|
"-DUSE_C10D_GLOO",
|
||||||
|
"-DUSE_RPC",
|
||||||
|
"-DUSE_TENSORPIPE",
|
||||||
|
] + (
|
||||||
|
["-DUSE_C10D_MPI"] if use_mpi else []
|
||||||
|
) + (
|
||||||
|
["-DUSE_KINETO", "-DUSE_KINETO_UPDATED"] if use_kineto() else []
|
||||||
|
) + (
|
||||||
|
["-DENABLE_LIBKINETO_CLIENT"] if native.read_config("kineto", "enable_libkineto_client", "1") == "1" else []
|
||||||
|
)
|
||||||
|
|
||||||
|
compiler_flags_cuda = [
|
||||||
|
"-DUSE_CUDNN",
|
||||||
|
"-DUSE_NCCL",
|
||||||
|
]
|
||||||
|
|
||||||
|
compiler_flags_hip = []
|
||||||
|
|
||||||
|
propagated_pp_flags_cuda = [
|
||||||
|
"-DUSE_CUDA",
|
||||||
|
"-DUSE_C10D_NCCL",
|
||||||
|
]
|
||||||
|
|
||||||
|
common_headers = glob([
|
||||||
|
"torch/csrc/**/*.h",
|
||||||
|
# c10d used to be a separate library whose includes ended in .hpp.
|
||||||
|
"torch/csrc/distributed/c10d/*.hpp",
|
||||||
|
"torch/csrc/generic/*.cpp",
|
||||||
|
]) + [
|
||||||
|
"torch/csrc/deploy/Exception.h",
|
||||||
|
"torch/csrc/deploy/deploy.h",
|
||||||
|
"torch/csrc/deploy/elf_file.h",
|
||||||
|
"torch/csrc/deploy/environment.h",
|
||||||
|
"torch/csrc/deploy/interpreter/builtin_registry.h",
|
||||||
|
"torch/csrc/deploy/interpreter/interpreter_impl.h",
|
||||||
|
"torch/csrc/deploy/loader.h",
|
||||||
|
"torch/csrc/deploy/mem_file.h",
|
||||||
|
"torch/csrc/deploy/noop_environment.h",
|
||||||
|
"torch/csrc/deploy/path_environment.h",
|
||||||
|
"torch/csrc/deploy/unity/tests/test_unity.h",
|
||||||
|
"torch/csrc/deploy/unity/xar_environment.h",
|
||||||
|
"torch/csrc/distributed/rpc/metrics/RpcMetricsHandler.h",
|
||||||
|
"test/cpp/jit/test_custom_class_registrations.h",
|
||||||
|
"test/cpp/jit/test_utils.h",
|
||||||
|
"test/cpp/tensorexpr/gtest_assert_float_eq.h",
|
||||||
|
"test/cpp/tensorexpr/padded_buffer.h",
|
||||||
|
"test/cpp/tensorexpr/test_base.h",
|
||||||
|
"test/cpp/tensorexpr/test_utils.h",
|
||||||
|
]
|
||||||
|
common_headers.remove("torch/csrc/jit/serialization/mobile_bytecode_generated.h")
|
||||||
|
|
||||||
|
common_flags = {
|
||||||
|
"compiler_specific_flags": {
|
||||||
|
"clang": [
|
||||||
|
"-Wno-absolute-value",
|
||||||
|
"-Wno-expansion-to-defined",
|
||||||
|
"-Wno-pessimizing-move",
|
||||||
|
"-Wno-return-type-c-linkage",
|
||||||
|
"-Wno-unknown-pragmas",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"headers": common_headers,
|
||||||
|
}
|
||||||
|
|
||||||
|
include_directories = [
|
||||||
|
"..",
|
||||||
|
".",
|
||||||
|
"torch/csrc/api/include",
|
||||||
|
"torch/csrc",
|
||||||
|
# c10d used to be a separate library and its includes were c10d/Foo.hpp,
|
||||||
|
# hence we now need this hack to keep supporting them.
|
||||||
|
"torch/csrc/distributed",
|
||||||
|
"torch/csrc/nn",
|
||||||
|
]
|
||||||
|
|
||||||
|
_libtorch_sources = list(libtorch_sources())
|
||||||
|
|
||||||
|
# Add the Gloo and TensorPipe backends specific to Facebook networking.
|
||||||
|
_libtorch_sources.append("torch/csrc/distributed/c10d/fb/GlooDeviceFactory.cpp")
|
||||||
|
_libtorch_sources.append("torch/csrc/distributed/rpc/fb/tensorpipe_agent.cpp")
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "libtorch",
|
||||||
|
srcs = _libtorch_sources + ([
|
||||||
|
"torch/csrc/jit/serialization/flatbuffer_serializer.cpp",
|
||||||
|
"torch/csrc/jit/serialization/flatbuffer_serializer_jit.cpp",
|
||||||
|
"torch/csrc/jit/mobile/flatbuffer_loader.cpp",
|
||||||
|
] if enable_flatbuffer else []),
|
||||||
|
link_whole = True,
|
||||||
|
include_directories = include_directories,
|
||||||
|
propagated_pp_flags = propagated_pp_flags_cpu + (["-DENABLE_FLATBUFFER"] if enable_flatbuffer else []),
|
||||||
|
exported_deps = (
|
||||||
|
[
|
||||||
|
":ATen-cpu",
|
||||||
|
":generated-autograd-headers",
|
||||||
|
":generated-lazy-headers",
|
||||||
|
"//caffe2:version_cpp",
|
||||||
|
"//caffe2/caffe2:caffe2_cpu",
|
||||||
|
"//caffe2/caffe2/quantization/server:dnnlowp_ops",
|
||||||
|
"//caffe2/caffe2/serialize:inline_container",
|
||||||
|
"//caffe2/torch/lib/libshm:libshm",
|
||||||
|
"//gloo:gloo",
|
||||||
|
"//gloo/fb/transport/tls:tls",
|
||||||
|
"//gloo/transport/tcp:tcp",
|
||||||
|
"//tensorpipe:tensorpipe_cpu",
|
||||||
|
] + (["//kineto/libkineto:kineto"] if use_kineto() else []) +
|
||||||
|
(["//caffe2:mobile_bytecode"] if enable_flatbuffer else [])
|
||||||
|
),
|
||||||
|
exported_external_deps = [
|
||||||
|
("nanopb", None, "protobuf-nanopb"),
|
||||||
|
("protobuf", None),
|
||||||
|
("llvm-fb", None, "LLVMAnalysis"),
|
||||||
|
("llvm-fb", None, "LLVMBPFAsmParser"),
|
||||||
|
("llvm-fb", None, "LLVMBPFCodeGen"),
|
||||||
|
("llvm-fb", None, "LLVMCodeGen"),
|
||||||
|
("llvm-fb", None, "LLVMCore"),
|
||||||
|
("llvm-fb", None, "LLVMExecutionEngine"),
|
||||||
|
("llvm-fb", None, "LLVMIRReader"),
|
||||||
|
("llvm-fb", None, "LLVMInstCombine"),
|
||||||
|
("llvm-fb", None, "LLVMInterpreter"),
|
||||||
|
("llvm-fb", None, "LLVMMC"),
|
||||||
|
("llvm-fb", None, "LLVMNVPTXCodeGen"),
|
||||||
|
("llvm-fb", None, "LLVMOrcJIT"),
|
||||||
|
("llvm-fb", None, "LLVMRISCVAsmParser"),
|
||||||
|
("llvm-fb", None, "LLVMRISCVCodeGen"),
|
||||||
|
("llvm-fb", None, "LLVMScalarOpts"),
|
||||||
|
("llvm-fb", None, "LLVMSupport"),
|
||||||
|
("llvm-fb", None, "LLVMTarget"),
|
||||||
|
("llvm-fb", None, "LLVMTransformUtils"),
|
||||||
|
("llvm-fb", None, "LLVMVectorize"),
|
||||||
|
("llvm-fb", None, "LLVMWebAssemblyAsmParser"),
|
||||||
|
("llvm-fb", None, "LLVMWebAssemblyCodeGen"),
|
||||||
|
("llvm-fb", None, "LLVMWebAssemblyInfo"),
|
||||||
|
("llvm-fb", None, "LLVMX86AsmParser"),
|
||||||
|
("llvm-fb", None, "LLVMX86CodeGen"),
|
||||||
|
("llvm-fb", None, "LLVMipo"),
|
||||||
|
] + ([("openmpi", None, "openmpi")] if use_mpi else []),
|
||||||
|
compiler_flags = compiler_flags_cpu,
|
||||||
|
**common_flags
|
||||||
|
)
|
||||||
|
|
||||||
|
# Below rules are used to stringify NVfuser runtime library into a header files
|
||||||
|
python_binary(
|
||||||
|
name = "nvfuser-stringify",
|
||||||
|
srcs = ["torch/csrc/jit/codegen/cuda/tools/stringify_file.py"],
|
||||||
|
base_module = "",
|
||||||
|
main_module = "torch.csrc.jit.codegen.cuda.tools.stringify_file",
|
||||||
|
)
|
||||||
|
|
||||||
|
# files in libtorch_nvfuser_runtime_sources that are violating package boundaries
|
||||||
|
# are mapped to their corresponding export_file rules.
|
||||||
|
violation_paths_to_rule = {
|
||||||
|
"aten/src/ATen/cuda/detail/PhiloxCudaStateRaw.cuh": ":aten/src/ATen/cuda/detail/PhiloxCudaStateRaw.cuh",
|
||||||
|
"aten/src/ATen/cuda/detail/UnpackRaw.cuh": ":aten/src/ATen/cuda/detail/UnpackRaw.cuh",
|
||||||
|
}
|
||||||
|
|
||||||
|
for name in libtorch_nvfuser_runtime_sources:
|
||||||
|
src_path = violation_paths_to_rule.get(name, name)
|
||||||
|
filename = _path_to_filename(src_path)
|
||||||
|
native.genrule(
|
||||||
|
name = "gen-nvfuser-hdr={}.h".format(filename),
|
||||||
|
srcs = {name: src_path},
|
||||||
|
bash = "$(exe :nvfuser-stringify) -i $SRCDIR/{} -o $OUT".format(name),
|
||||||
|
out = "{}.h".format(filename),
|
||||||
|
)
|
||||||
|
cpp_library(
|
||||||
|
name = "generated-nvfuser-headers",
|
||||||
|
headers = [":gen-nvfuser-hdr=" + x for x in libtorch_nvfuser_generated_headers],
|
||||||
|
header_namespace = "nvfuser_resources",
|
||||||
|
)
|
||||||
|
|
||||||
|
_libtorch_cuda_sources = list(libtorch_cuda_sources)
|
||||||
|
cpp_library(
|
||||||
|
name = "libtorch_cuda",
|
||||||
|
srcs = _libtorch_cuda_sources,
|
||||||
|
link_whole = True,
|
||||||
|
include_directories = include_directories,
|
||||||
|
# TODO: putting USE_CUDA in propagated_pp_flags is error-prone
|
||||||
|
propagated_pp_flags = propagated_pp_flags_cuda,
|
||||||
|
exported_deps = [
|
||||||
|
":ATen",
|
||||||
|
":generated-aten-headers-cuda",
|
||||||
|
":generated-autograd-headers",
|
||||||
|
":generated-nvfuser-headers",
|
||||||
|
":libtorch",
|
||||||
|
"//caffe2/caffe2:caffe2_cpu",
|
||||||
|
"//caffe2/caffe2:caffe2_gpu",
|
||||||
|
"//caffe2/torch/lib/libshm:libshm",
|
||||||
|
"//gloo:gloo_gpu_cuda",
|
||||||
|
"//tensorpipe:tensorpipe_cuda",
|
||||||
|
],
|
||||||
|
exported_external_deps = [
|
||||||
|
("cudnn", None, "cudnn-lazy"),
|
||||||
|
("cuda", None, "nvToolsExt-lazy"),
|
||||||
|
("cuda", None, "nvrtc-lazy"),
|
||||||
|
("cuda", None, "nvrtc-builtins-lazy"),
|
||||||
|
] + get_nccl_dependency(),
|
||||||
|
compiler_flags = compiler_flags_cpu + compiler_flags_cuda,
|
||||||
|
**common_flags
|
||||||
|
)
|
||||||
|
|
||||||
|
# (original_paths, hipified_paths)
|
||||||
|
libtorch_hip_headers_filter = torch_cpp_headers + [h for h in common_headers if any([h.startswith(d) for d in [
|
||||||
|
# headers in the following directories are added to libtorch_hip_headers_filter
|
||||||
|
# so that they are not hipified.
|
||||||
|
"torch/csrc/deploy/",
|
||||||
|
"torch/csrc/distributed/rpc/metrics/",
|
||||||
|
"torch/csrc/jit/serialization/",
|
||||||
|
"torch/cpp/jit/",
|
||||||
|
"torch/cpp/tensorexpr/",
|
||||||
|
]])]
|
||||||
|
libtorch_hip_sources = (libtorch_cuda_sources, [f.replace(".cu", ".hip") for f in libtorch_cuda_sources])
|
||||||
|
libtorch_hip_headers = ([f for f in common_headers if f not in libtorch_hip_headers_filter],) * 2
|
||||||
|
|
||||||
|
custom_rule(
|
||||||
|
name = "fb_libtorch_hipify_gen",
|
||||||
|
srcs = libtorch_hip_sources[0] + libtorch_hip_headers[0],
|
||||||
|
build_args = "--source-dir= --hipify-dir= --copy-dir= --rewrite-cu-ext",
|
||||||
|
build_script_dep = "//caffe2:fb_caffe2_hipify",
|
||||||
|
output_gen_files = libtorch_hip_sources[1] + libtorch_hip_headers[1],
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "libtorch_hip_headers",
|
||||||
|
headers = [":fb_libtorch_hipify_gen={}".format(f) for f in libtorch_hip_headers[1]],
|
||||||
|
header_namespace = "",
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "libtorch_hip",
|
||||||
|
srcs = [":fb_libtorch_hipify_gen={}".format(f) for f in libtorch_hip_sources[1]],
|
||||||
|
headers = [f for f in common_headers if f in libtorch_hip_headers_filter],
|
||||||
|
link_whole = True,
|
||||||
|
propagated_pp_flags = hip_pp_flags,
|
||||||
|
exported_deps = [
|
||||||
|
":generated-aten-headers-hip",
|
||||||
|
":generated-autograd-headers",
|
||||||
|
":generated-nvfuser-headers",
|
||||||
|
":libtorch",
|
||||||
|
":libtorch_hip_headers",
|
||||||
|
"//caffe2:ATen-hip",
|
||||||
|
"//caffe2/caffe2:caffe2_cpu",
|
||||||
|
"//caffe2/caffe2:caffe2_gpu_hip",
|
||||||
|
"//caffe2/torch/lib/libshm:libshm",
|
||||||
|
"//gloo:gloo_gpu_hip",
|
||||||
|
"//tensorpipe:tensorpipe_cpu", # TODO: include a HIP version once it's developed
|
||||||
|
],
|
||||||
|
exported_external_deps = hip_external_deps,
|
||||||
|
compiler_flags = compiler_flags_cpu + compiler_flags_hip + [
|
||||||
|
"-Wno-unused-result",
|
||||||
|
],
|
||||||
|
hip_flags = ["-Wno-unused-result"] + get_hip_flags(),
|
||||||
|
compiler_specific_flags = common_flags["compiler_specific_flags"],
|
||||||
|
)
|
||||||
|
|
||||||
|
gpu_library_targets(
|
||||||
|
name = "libtorch_gpu",
|
||||||
|
deps_cpu = [
|
||||||
|
":libtorch",
|
||||||
|
],
|
||||||
|
deps_cuda = [
|
||||||
|
":libtorch_cuda",
|
||||||
|
],
|
||||||
|
deps_hip = [
|
||||||
|
":libtorch_hip",
|
||||||
|
],
|
||||||
|
exclude_hip_target = False,
|
||||||
|
extra_external_deps = [],
|
||||||
|
)
|
||||||
|
|
||||||
|
# torch-cpp is still conditionally compiled based on USE_CUDA. Ideally we'd
|
||||||
|
# separate it out as an additive library instead.
|
||||||
|
gpu_library_selector(
|
||||||
|
name = "torch-cpp",
|
||||||
|
deps_cpu = [":torch-cpp-cpu"],
|
||||||
|
deps_cuda = [":torch-cpp-cuda"],
|
||||||
|
deps_hip = [":torch-cpp-hip"],
|
||||||
|
merge_cpu_deps = False,
|
||||||
|
exclude_hip_target = False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# USE_CUDA flag is propagated through propagated_pp_flags on libtorch
|
||||||
|
cpp_library(
|
||||||
|
name = "torch-cpp-cuda",
|
||||||
|
srcs = torch_cpp_srcs,
|
||||||
|
headers = torch_cpp_headers,
|
||||||
|
include_directories = [
|
||||||
|
".",
|
||||||
|
"torch/csrc/api/include/",
|
||||||
|
],
|
||||||
|
exported_deps = [
|
||||||
|
":libtorch_cuda",
|
||||||
|
"//caffe2/torch/fb/init:init",
|
||||||
|
],
|
||||||
|
exported_external_deps = [
|
||||||
|
("cuda", None, "cuda-lazy"),
|
||||||
|
("cudnn", None, "cudnn-lazy"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "torch-cpp-hip",
|
||||||
|
srcs = torch_cpp_srcs,
|
||||||
|
headers = torch_cpp_headers,
|
||||||
|
include_directories = [
|
||||||
|
".",
|
||||||
|
"torch/csrc/api/include/",
|
||||||
|
],
|
||||||
|
exported_deps = [
|
||||||
|
":libtorch_hip",
|
||||||
|
"//caffe2/torch/fb/init:init",
|
||||||
|
],
|
||||||
|
exported_external_deps = hip_external_deps,
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "torch-cpp-cpu",
|
||||||
|
srcs = torch_cpp_srcs,
|
||||||
|
headers = torch_cpp_headers,
|
||||||
|
include_directories = [
|
||||||
|
".",
|
||||||
|
"torch/csrc/api/include/",
|
||||||
|
],
|
||||||
|
exported_deps = [
|
||||||
|
":libtorch",
|
||||||
|
"//caffe2/torch/fb/init:init",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# _C_impl is still conditionally compiled based on USE_CUDA. Ideally we'd
|
||||||
|
# separate it out as an additive library instead.
|
||||||
|
# TODO: split it into cpp and cuda parts similarly to libtorch
|
||||||
|
gpu_library_selector(
|
||||||
|
name = "_C_impl",
|
||||||
|
deps_cpu = [":_C_impl_cpu"],
|
||||||
|
deps_cuda = [":_C_impl_cuda"],
|
||||||
|
deps_hip = [":_C_impl_hip"],
|
||||||
|
merge_cpu_deps = False,
|
||||||
|
exclude_hip_target = False,
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "_C_impl_cpu",
|
||||||
|
srcs = libtorch_python_sources,
|
||||||
|
link_whole = True,
|
||||||
|
exported_deps = [
|
||||||
|
"fbsource//third-party/fmt:fmt",
|
||||||
|
":torch-cpp-cpu",
|
||||||
|
"//caffe2/torch/fb/init:init",
|
||||||
|
"//caffe2/torch/lib/libshm:libshm",
|
||||||
|
],
|
||||||
|
exported_external_deps = [
|
||||||
|
("numpy", None, "cpp"),
|
||||||
|
("pybind11", None),
|
||||||
|
("python", None),
|
||||||
|
],
|
||||||
|
compiler_flags = compiler_flags_cpu,
|
||||||
|
compiler_specific_flags = common_flags["compiler_specific_flags"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# This target is used to help get headers for compile-time deps for torch::deploy
|
||||||
|
# libinterpreter.so build _without_ getting link-time deps, which are supplied
|
||||||
|
# separately by the application that dlopens libinterpreter.so.
|
||||||
|
#
|
||||||
|
# We make use of the buck auto-generated #headers flavor of a target to accomplish this.
|
||||||
|
#
|
||||||
|
# However, since #headers flavor of target with srcs can't be used in all build modes, we
|
||||||
|
# work around this limitation by using this 'pass-through' target, which has a usable
|
||||||
|
# #headers flavor in all build modes.
|
||||||
|
cpp_library(
|
||||||
|
name = "headers_for_torch_python_deps",
|
||||||
|
exported_deps = [
|
||||||
|
":_C_impl_cpu",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
cpp_library(
|
||||||
|
name = "headers_for_torch_python_cuda_deps",
|
||||||
|
exported_deps = [
|
||||||
|
":_C_impl_cuda",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# This target compiles torch_python bindings, but skips the deps on actual
|
||||||
|
# torch and python since those will be integrated specially in the wrapper for
|
||||||
|
# libinterpreter.so used in torch::deploy
|
||||||
|
cpp_library(
|
||||||
|
name = "torch_python_without_torch",
|
||||||
|
srcs = libtorch_python_sources + torch_cpp_srcs,
|
||||||
|
undefined_symbols = True,
|
||||||
|
preferred_linkage = "static",
|
||||||
|
exported_deps = [
|
||||||
|
":headers_for_torch_python_deps#headers",
|
||||||
|
],
|
||||||
|
exported_external_deps = [
|
||||||
|
("pybind11", None),
|
||||||
|
("frozenpython", None, "python-headers"),
|
||||||
|
],
|
||||||
|
compiler_flags = compiler_flags_cpu + [
|
||||||
|
# some code in the Python bindings compiles differently
|
||||||
|
# when you are deploy
|
||||||
|
"-DUSE_DEPLOY",
|
||||||
|
],
|
||||||
|
compiler_specific_flags = common_flags["compiler_specific_flags"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "torch_python_cuda_without_torch",
|
||||||
|
srcs = libtorch_python_sources + torch_cpp_srcs + libtorch_python_cuda_sources,
|
||||||
|
undefined_symbols = True,
|
||||||
|
preferred_linkage = "static",
|
||||||
|
exported_deps = [
|
||||||
|
":headers_for_torch_python_cuda_deps#headers",
|
||||||
|
],
|
||||||
|
exported_external_deps = [
|
||||||
|
("pybind11", None),
|
||||||
|
("frozenpython", None, "python-headers"),
|
||||||
|
],
|
||||||
|
compiler_flags = compiler_flags_cpu + [
|
||||||
|
"-DUSE_CUDA",
|
||||||
|
# some code in the Python bindings compiles differently
|
||||||
|
# when you are deploy
|
||||||
|
"-DUSE_DEPLOY",
|
||||||
|
],
|
||||||
|
compiler_specific_flags = common_flags["compiler_specific_flags"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "_C_impl_cuda",
|
||||||
|
srcs = libtorch_python_sources + libtorch_python_cuda_sources,
|
||||||
|
link_whole = True,
|
||||||
|
exported_deps = [
|
||||||
|
"fbsource//third-party/fmt:fmt",
|
||||||
|
":torch-cpp-cuda",
|
||||||
|
"//caffe2/torch/fb/init:init",
|
||||||
|
"//caffe2/torch/lib/libshm:libshm",
|
||||||
|
],
|
||||||
|
exported_external_deps = [
|
||||||
|
("numpy", None, "cpp"),
|
||||||
|
("pybind11", None),
|
||||||
|
("python", None),
|
||||||
|
],
|
||||||
|
compiler_flags = compiler_flags_cpu + compiler_flags_cuda,
|
||||||
|
compiler_specific_flags = common_flags["compiler_specific_flags"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Autogenerated files whose rules contain ":" are not hipified.
|
||||||
|
libtorch_python_hip_sources = [f for f in (libtorch_python_sources + libtorch_python_cuda_sources) if ":" in f]
|
||||||
|
libtorch_python_hip_sources_hipified = [f for f in (libtorch_python_sources + libtorch_python_cuda_sources) if not ":" in f]
|
||||||
|
|
||||||
|
custom_rule(
|
||||||
|
name = "fb_C_impl_hipify_gen",
|
||||||
|
srcs = libtorch_python_hip_sources_hipified,
|
||||||
|
build_args = "--source-dir= --hipify-dir= --copy-dir=",
|
||||||
|
build_script_dep = "//caffe2:fb_caffe2_hipify",
|
||||||
|
output_gen_files = libtorch_python_hip_sources_hipified,
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "_C_impl_hip",
|
||||||
|
srcs = [":fb_C_impl_hipify_gen={}".format(f) for f in (libtorch_python_hip_sources_hipified)] + libtorch_python_hip_sources,
|
||||||
|
link_whole = True,
|
||||||
|
exported_deps = [
|
||||||
|
"fbsource//third-party/fmt:fmt",
|
||||||
|
":torch-cpp-hip",
|
||||||
|
"//caffe2/torch/fb/init:init",
|
||||||
|
"//caffe2/torch/lib/libshm:libshm",
|
||||||
|
],
|
||||||
|
exported_external_deps = [
|
||||||
|
("numpy", None, "cpp"),
|
||||||
|
("pybind11", None),
|
||||||
|
("python", None),
|
||||||
|
],
|
||||||
|
compiler_flags = compiler_flags_cpu + compiler_flags_hip + ["-Wno-unused-result"],
|
||||||
|
compiler_specific_flags = common_flags["compiler_specific_flags"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_python_extension(
|
||||||
|
name = "_C",
|
||||||
|
srcs = [
|
||||||
|
"torch/csrc/stub.c",
|
||||||
|
],
|
||||||
|
base_module = "torch",
|
||||||
|
deps = [
|
||||||
|
":_C_impl",
|
||||||
|
"//caffe2:flatbuffer_loader",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_python_extension(
|
||||||
|
name = "_C_flatbuffer",
|
||||||
|
srcs = [
|
||||||
|
"torch/csrc/stub_with_flatbuffer.c",
|
||||||
|
"torch/csrc/init_flatbuffer_module.cpp",
|
||||||
|
],
|
||||||
|
base_module = "torch",
|
||||||
|
deps = [
|
||||||
|
":_C_impl",
|
||||||
|
"//caffe2:flatbuffer_loader",
|
||||||
|
"//caffe2:flatbuffer_serializer",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
return r
|
117
torch/csrc/deploy/interpreter/defs.bzl
Normal file
117
torch/csrc/deploy/interpreter/defs.bzl
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
load("@fbcode_macros//build_defs:cpp_binary.bzl", "cpp_binary")
|
||||||
|
load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
|
||||||
|
load("@fbcode_macros//build_defs:native_rules.bzl", "cxx_genrule")
|
||||||
|
|
||||||
|
# @lint-ignore-every BUCKLINT
|
||||||
|
load("@fbsource//tools/build_defs:fb_native_wrapper.bzl", "fb_native")
|
||||||
|
|
||||||
|
def embedded_interpreter(name, suffix, legacy = False, exported_deps = [], exported_external_deps = []):
|
||||||
|
final_name = name
|
||||||
|
is_all = suffix == "all"
|
||||||
|
is_cuda = suffix == "cuda" or is_all
|
||||||
|
platform_static_lib = []
|
||||||
|
for platform in ["platform009", "platform010"]:
|
||||||
|
name = platform + "_" + final_name
|
||||||
|
so_name = name + ".so"
|
||||||
|
cpp_binary(
|
||||||
|
name = so_name,
|
||||||
|
srcs = [
|
||||||
|
"interpreter_impl.cpp",
|
||||||
|
] + (["import_find_sharedfuncptr.cpp"] if is_all else []),
|
||||||
|
headers = [
|
||||||
|
"Optional.hpp",
|
||||||
|
"interpreter_impl.h",
|
||||||
|
],
|
||||||
|
header_namespace = "torch/csrc/deploy",
|
||||||
|
dlopen_enabled = True,
|
||||||
|
linker_flags = ([
|
||||||
|
# This ensures only the intended interface symbols are public/global
|
||||||
|
# the rest are hidden, regardless of how they were compiled
|
||||||
|
# (e.g. fvisibility=hidden is NOT important for the component
|
||||||
|
# objs in this library, since we override here.)
|
||||||
|
"--version-script=$(location :hide_symbols.script)",
|
||||||
|
] if not is_all else []),
|
||||||
|
deps = [
|
||||||
|
"fbsource//third-party/fmt:fmt",
|
||||||
|
] + ([
|
||||||
|
":builtin_registry_cuda",
|
||||||
|
"//caffe2:torch_python_cuda_without_torch",
|
||||||
|
"//deeplearning/trt/python:frozen_tensorrt",
|
||||||
|
] if is_cuda else [
|
||||||
|
":builtin_registry",
|
||||||
|
"//caffe2:torch_python_without_torch",
|
||||||
|
]),
|
||||||
|
external_deps =
|
||||||
|
[
|
||||||
|
# needed for interpreter.cpp itself, it uses pybind currently
|
||||||
|
("frozenpython", None, "python-frozen"),
|
||||||
|
("frozenpython", None, "python"),
|
||||||
|
],
|
||||||
|
fbcode_platform = platform,
|
||||||
|
)
|
||||||
|
|
||||||
|
# We build torch::deploy with two embedded binaries- one with only cpu py bindings,
|
||||||
|
# the other with cpu+cuda py bindings. This unfortunately wastes some binary size,
|
||||||
|
# but at least at runtime only one of them is loaded.
|
||||||
|
#
|
||||||
|
# This is becuase of two reasons
|
||||||
|
# (1) that applications such as predictor want to depend on torch::deploy in a
|
||||||
|
# cuda-agnostic way, e.g. they don't choose yet, and a binary/app that depends
|
||||||
|
# on predictor either chooses to include or not include a dep on cuda.
|
||||||
|
#
|
||||||
|
# (2) the way the embedded binary is created and loaded, it only exposes a small
|
||||||
|
# set of interface symbols globally, for creating a new interpreter, and hides its
|
||||||
|
# other symbols (esp. python ones) so they don't conflict with other interpreters.
|
||||||
|
# This prevents dividing the cpu and cuda portions of bindings into _separate_ libs
|
||||||
|
# and loading the cuda part additively. Hence to achieve requirement (1) we bundle
|
||||||
|
# two complete interpreter libs, one with and one without cuda.
|
||||||
|
|
||||||
|
cp_cmd = "$(location //caffe2/torch/csrc/deploy:remove_dt_needed)" if suffix == "all" else "cp"
|
||||||
|
|
||||||
|
build_name = "build_" + name
|
||||||
|
if not legacy:
|
||||||
|
cxx_genrule(
|
||||||
|
name = build_name,
|
||||||
|
out = "embedded_interpreter_" + suffix + ".a",
|
||||||
|
cmd = """\
|
||||||
|
""" + cp_cmd + """ $(location :""" + so_name + """) libtorch_deployinterpreter_internal_""" + suffix + """.so
|
||||||
|
ld -r -b binary -o ${TMP}/embedded_interpreter_""" + suffix + """.o libtorch_deployinterpreter_internal_""" + suffix + """.so
|
||||||
|
objcopy --rename-section .data=.torch_deploy_payload.interpreter_""" + suffix + """,readonly,contents -N _binary_libtorch_deployinterpreter_""" + suffix + """_so_start -N _binary_libtorch_deployinterpreter_""" + suffix + """_so_end ${TMP}/embedded_interpreter_""" + suffix + """.o
|
||||||
|
ar rcs ${OUT} ${TMP}/embedded_interpreter_""" + suffix + """.o
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
cxx_genrule(
|
||||||
|
name = build_name,
|
||||||
|
out = "embedded_interpreter_cuda_legacy.a",
|
||||||
|
cmd = """\
|
||||||
|
cp $(location :""" + so_name + """) libtorch_deployinterpreter_cuda.so
|
||||||
|
ld -r -b binary -o ${TMP}/embedded_interpreter_cuda.o libtorch_deployinterpreter_cuda.so
|
||||||
|
ar rcs ${OUT} ${TMP}/embedded_interpreter_cuda.o
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
platform_static_lib.append(["^" + platform, ":" + build_name])
|
||||||
|
|
||||||
|
internal_name = final_name + "_internal"
|
||||||
|
fb_native.prebuilt_cxx_library(
|
||||||
|
preferred_linkage = "static",
|
||||||
|
name = internal_name,
|
||||||
|
visibility = ["PUBLIC"],
|
||||||
|
link_whole = True,
|
||||||
|
platform_static_lib = platform_static_lib,
|
||||||
|
)
|
||||||
|
|
||||||
|
# a thin wrapper around :embedded_interpreter_internal to add --export-dynamic
|
||||||
|
# linker flags. The flag will be propagated to cpp_binary. We don't require
|
||||||
|
# cpp_binary to explicitly enable --export-dynamic any more. New usecases usually
|
||||||
|
# forgot to do so and caused interpreter not found crash.
|
||||||
|
cpp_library(
|
||||||
|
name = final_name,
|
||||||
|
linker_flags = [
|
||||||
|
"--export-dynamic",
|
||||||
|
],
|
||||||
|
exported_deps = [
|
||||||
|
":" + internal_name,
|
||||||
|
] + exported_deps,
|
||||||
|
exported_external_deps = exported_external_deps,
|
||||||
|
)
|
46
torch/csrc/deploy/unity/unity.bzl
Normal file
46
torch/csrc/deploy/unity/unity.bzl
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
|
||||||
|
load("@fbcode_macros//build_defs:native_rules.bzl", "cxx_genrule")
|
||||||
|
load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")
|
||||||
|
|
||||||
|
# @lint-ignore-every BUCKLINT
|
||||||
|
load("@fbsource//tools/build_defs:fb_native_wrapper.bzl", "fb_native")
|
||||||
|
|
||||||
|
def build_unity(name, **kwargs):
|
||||||
|
python_binary(name = name, **kwargs)
|
||||||
|
|
||||||
|
cxx_genrule(
|
||||||
|
name = "{}_build_python_app_lib".format(name),
|
||||||
|
out = "python_app.a",
|
||||||
|
cmd = """\
|
||||||
|
cp $(location :""" + name + """) python_app
|
||||||
|
ld -r -b binary -o ${TMP}/python_app.o python_app
|
||||||
|
# rename the .data section to .torch_deploy_payload.unity.
|
||||||
|
# don't set the alloc/load flags for the section so it will not join
|
||||||
|
# the party of relocation.
|
||||||
|
# Also strip the _binary_python_app_start/end/size symbols to avoid
|
||||||
|
# confusion.
|
||||||
|
objcopy --rename-section .data=.torch_deploy_payload.unity,readonly,contents -N _binary_python_app_start -N _binary_python_app_end -N _binary_python_app_size ${TMP}/python_app.o
|
||||||
|
ar rcs ${OUT} ${TMP}/python_app.o
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
fb_native.prebuilt_cxx_library(
|
||||||
|
name = "{}_python_app_lib".format(name),
|
||||||
|
visibility = ["PUBLIC"],
|
||||||
|
link_whole = True,
|
||||||
|
preferred_linkage = "static",
|
||||||
|
static_lib = ":{}_build_python_app_lib".format(name),
|
||||||
|
)
|
||||||
|
|
||||||
|
cpp_library(
|
||||||
|
name = "{}_unity_lib".format(name),
|
||||||
|
srcs = [
|
||||||
|
],
|
||||||
|
linker_flags = [
|
||||||
|
"--export-dynamic",
|
||||||
|
],
|
||||||
|
exported_deps = [
|
||||||
|
"//caffe2/torch/csrc/deploy/unity:unity_core",
|
||||||
|
":{}_python_app_lib".format(name),
|
||||||
|
],
|
||||||
|
)
|
Reference in New Issue
Block a user