Add all bzl files per D36874458

2025-10-20 12:54:11 +08:00 · 2022-06-03 14:19:26 -07:00
parent 1f53d036d2
commit 03847808a0
34 changed files with 3888 additions and 0 deletions
--- a/android/build_defs.bzl
+++ b/android/build_defs.bzl
@ -0,0 +1,19 @@
 load("@fbsource//tools/build_defs:fb_xplat_cxx_test.bzl", "fb_xplat_cxx_test")
 load("@fbsource//xplat/caffe2:pt_defs.bzl", "get_build_from_deps_query", "pt_operator_registry")
 DEFAULT_PT_OP_DEPS = [
    "fbsource//xplat/caffe2:torch_mobile_ops_full_dev",
 ]
 def pt_xplat_cxx_test(name, deps = [], pt_op_deps = DEFAULT_PT_OP_DEPS, **kwargs):
    code_gen_lib = []
    if get_build_from_deps_query():
        lib_name = name + "_lib"
        pt_operator_registry(lib_name, preferred_linkage = "static", template_select = False, deps = pt_op_deps)
        code_gen_lib = [":" + lib_name]
        deps = deps + code_gen_lib
    fb_xplat_cxx_test(
        name = name,
        deps = deps,
        **kwargs
    )
--- a/c10/c10_defs.bzl
+++ b/c10/c10_defs.bzl
@ -0,0 +1,29 @@
 load("@fbsource//tools/build_defs:expect.bzl", "expect")
 load(
    "@fbsource//tools/build_defs/apple:build_mode_defs.bzl",
    "is_production_build",
 )
 ###############################################################################
 # Check if we need to strip glog.
 def _get_strip_glog_config():
    c2_strip_glog = native.read_config("caffe2", "strip_glog", "1")
    expect(
        c2_strip_glog in ("0", "1"),
        c2_strip_glog,
    )
    return bool(int(c2_strip_glog))
 # For iOS production builds (and all Android builds), strip GLOG logging to
 # save size. We can disable by setting caffe2.strip_glog=0 in .buckconfig.local.
 def get_fbobjc_strip_glog_flags():
    if is_production_build() or _get_strip_glog_config():
        return ["-UGOOGLE_STRIP_LOG", "-DGOOGLE_STRIP_LOG=3"]
    else:
        return ["-UGOOGLE_STRIP_LOG"]
 def get_fbandroid_strip_glog_flags():
    if _get_strip_glog_config():
        return ["-UGOOGLE_STRIP_LOG", "-DGOOGLE_STRIP_LOG=1"]
    else:
        return []
--- a/c10/defs_hip.bzl
+++ b/c10/defs_hip.bzl
@ -0,0 +1,126 @@
 load("@bazel_skylib//lib:paths.bzl", "paths")
 load("//caffe2:defs_hip.bzl", "get_hip_file_path")
 gpu_file_extensions = [".cu", ".c", ".cc", ".cpp"]
 gpu_header_extensions = [".cuh", ".h", ".hpp"]
 def is_test_files(filepath):
    if filepath.startswith("test"):
        return True
    else:
        return False
 def get_c10_hip_srcs():
    gpu_file_pattern = [
        base + suffix
        for base in c10_includes
        for suffix in gpu_file_extensions
    ]
    native_gpu_files = native.glob(gpu_file_pattern)
    gpu_files = []
    hip_files = []
    for name in native_gpu_files:
        # exclude the test folder
        if is_test_files(name):
            continue
        gpu_files.append(name)
        hip_file_name = get_hip_file_path(paths.join("cuda/", name))
        hip_files.append(hip_file_name)
    # there will be some native hip files that needs suffix changed
    native_hip_pattern = [
        "hip/**/*.hip",
    ]
    native_hip_files = native.glob(native_hip_pattern)
    gpu_files += native_hip_files
    hip_files += native_hip_files
    # we run hipify script under the caffe2 folder; therefore we need to
    # prepend c10 to the path so that buck can find the hipified file
    real_hip_files = []
    for filename in hip_files:
        real_hip_files.append(paths.join("c10", filename))
    # return the src and output_gen files
    return gpu_files, real_hip_files
 def get_c10_hip_headers():
    gpu_file_pattern = [
        base + suffix
        for base in c10_includes
        for suffix in gpu_header_extensions
    ]
    native_gpu_files = native.glob(gpu_file_pattern)
    # store the original
    gpu_files = []
    hip_files = []
    for name in native_gpu_files:
        if is_test_files(name):
            continue
        gpu_files.append(name)
        hip_file_name = get_hip_file_path(paths.join("cuda/", name))
        hip_files.append(hip_file_name)
    # there will be some native hip files that needs suffix changed
    native_hip_pattern = [
        "hip/**/*" + suffix
        for suffix in gpu_header_extensions
    ]
    native_hip_files = native.glob(native_hip_pattern)
    gpu_files += native_hip_files
    hip_files += native_hip_files
    # we run hipify script under the caffe2 folder; therefore we need to
    # prepend c10 to the path so that buck can find the hipified file
    real_hip_files = []
    for filename in hip_files:
        real_hip_files.append(paths.join("c10", filename))
    # return the src and output_gen files
    return gpu_files, real_hip_files
 def get_c10_hip_test_files():
    gpu_file_pattern = [
        base + suffix
        for base in c10_includes
        for suffix in gpu_file_extensions
    ]
    native_gpu_files = native.glob(gpu_file_pattern)
    # store the original
    gpu_files = []
    hip_files = []
    for name in native_gpu_files:
        if not is_test_files(name):
            continue
        gpu_files.append(name)
        hip_file_name = get_hip_file_path(paths.join("cuda/", name))
        hip_files.append(hip_file_name)
    # there will be some native hip files that needs suffix changed
    native_hip_pattern = [
        "hip/test/**/*" + suffix
        for suffix in gpu_header_extensions
    ]
    native_hip_files = native.glob(native_hip_pattern)
    gpu_files += native_hip_files
    hip_files += native_hip_files
    # we run hipify script under the caffe2 folder; therefore we need to
    # prepend c10 to the path so that buck can find the hipified file
    real_hip_files = []
    for filename in hip_files:
        real_hip_files.append(paths.join("c10", filename))
    # return the src and output_gen files
    return gpu_files, real_hip_files
 c10_includes = ["**/*"]
--- a/c10/ovrsource_defs.bzl
+++ b/c10/ovrsource_defs.bzl
@ -0,0 +1,276 @@
 load("//arvr/tools/build_defs:genrule_utils.bzl", "gen_cmake_header")
 load("//arvr/tools/build_defs:oxx.bzl", "oxx_static_library")
 cpu_supported_platforms = [
    "ovr_config//os:android",
    "ovr_config//os:iphoneos",
    "ovr_config//os:linux-x86_64",
    "ovr_config//os:macos",
    "ovr_config//os:windows-x86_64",
    "ovr_config//runtime:arm64-linux-ubuntu-neon",
 ]
 cuda_supported_platforms = [
    "ovr_config//os:linux-cuda",
    "ovr_config//os:windows-cuda",
 ]
 def define_c10_ovrsource(name, is_mobile):
    if is_mobile:
        pp_flags = ["-DC10_MOBILE=1"]
    else:
        pp_flags = []
    oxx_static_library(
        name = name,
        srcs = native.glob([
            "core/*.cpp",
            "core/impl/*.cpp",
            "mobile/*.cpp",
            "util/*.cpp",
        ]),
        compatible_with = cpu_supported_platforms,
        compiler_flags = select({
            "DEFAULT": [],
            "ovr_config//compiler:cl": [
                "/w",
            ],
            "ovr_config//toolchain/clang:win": [
                "-Wno-error",
                "-Wno-shadow",
                "-Wno-undef",
                "-Wno-unused-variable",
            ],
        }),
        include_directories = [".."],
        preprocessor_flags = [
            "-DNO_EXPORT",
            "-DC10_BUILD_MAIN_LIB=1",
            "-DSUPPORTS_BACKTRACE=0",
        ],
        public_include_directories = [".."],
        public_preprocessor_flags = pp_flags,
        public_raw_headers = native.glob([
            "core/*.h",
            "macros/*.h",
            "mobile/*.h",
            "test/util/*.h",  # some external tests use this
            "util/*.h",
        ]),
        raw_headers = native.glob([
            "core/impl/*.h",
        ]),
        reexport_all_header_dependencies = False,
        # tests = C10_CPU_TEST_TARGETS,
        visibility = [
            "//xplat/caffe2/c10:c10_ovrsource",
        ],
        deps = select({
            "DEFAULT": [],
            "ovr_config//os:linux": [
                "//third-party/numactl:numactl",
            ],
        }),
        exported_deps = [
            ":ovrsource_c10_cmake_macros.h",
            "//arvr/third-party/gflags:gflags",
            "//third-party/glog:glog",
            "//third-party/fmt:fmt",
        ],
    )
 def define_ovrsource_targets():
    # C10_CPU_TEST_FILES = native.glob([
    #     "test/core/*.cpp",
    #     "test/util/*.cpp",
    # ])
    # C10_GPU_TEST_FILES = native.glob([
    #     "cuda/test/**/*.cpp",
    # ])
    # C10_CPU_TEST_TARGETS = [
    #     ":" + paths.basename(test)[:-len(".cpp")] + "_ovrsource"
    #     for test in C10_CPU_TEST_FILES
    # ]
    # C10_GPU_TEST_TARGETS = [
    #     ":" + paths.basename(test)[:-len(".cpp")] + "_ovrsource"
    #     for test in C10_GPU_TEST_FILES
    # ]
    common_c10_cmake_defines = [
        ("#cmakedefine C10_BUILD_SHARED_LIBS", ""),
        ("#cmakedefine C10_DISABLE_NUMA", ""),
        ("#cmakedefine C10_USE_NUMA", ""),
        ("#cmakedefine C10_USE_MSVC_STATIC_RUNTIME", ""),
    ]
    mobile_c10_cmake_defines = [
        ("#cmakedefine C10_USE_GLOG", ""),
        ("#cmakedefine C10_USE_GFLAGS", ""),
    ]
    non_mobile_c10_cmake_defines = [
        ("#cmakedefine C10_USE_GLOG", "#define C10_USE_GLOG 1"),
        ("#cmakedefine C10_USE_GFLAGS", "#define C10_USE_GFLAGS 1"),
    ]
    gen_cmake_header(
        src = "macros/cmake_macros.h.in",
        defines = common_c10_cmake_defines + mobile_c10_cmake_defines,
        header = "c10/macros/cmake_macros.h",
        prefix = "ovrsource_c10_mobile_",
    )
    gen_cmake_header(
        src = "macros/cmake_macros.h.in",
        defines = common_c10_cmake_defines + non_mobile_c10_cmake_defines,
        header = "c10/macros/cmake_macros.h",
        prefix = "ovrsource_c10_non_mobile_",
    )
    oxx_static_library(
        name = "ovrsource_c10_cmake_macros.h",
        compatible_with = [
            "ovr_config//os:android",
            "ovr_config//os:iphoneos",
            "ovr_config//os:linux",
            "ovr_config//os:macos",
            "ovr_config//os:windows",
        ],
        deps = select({
            "ovr_config//os:android": [":ovrsource_c10_mobile_cmake_macros.h"],
            "ovr_config//os:iphoneos": [":ovrsource_c10_mobile_cmake_macros.h"],
            "ovr_config//os:linux": [":ovrsource_c10_non_mobile_cmake_macros.h"],
            "ovr_config//os:macos": [":ovrsource_c10_non_mobile_cmake_macros.h"],
            "ovr_config//os:windows": [":ovrsource_c10_non_mobile_cmake_macros.h"],
        }),
    )
    c10_cuda_macros = gen_cmake_header(
        src = "cuda/impl/cuda_cmake_macros.h.in",
        defines = [
            ("#cmakedefine C10_CUDA_BUILD_SHARED_LIBS", ""),
        ],
        header = "c10/cuda/impl/cuda_cmake_macros.h",
        prefix = "ovrsource",
    )
    oxx_static_library(
        name = "c10_ovrsource",
        compatible_with = cpu_supported_platforms,
        exported_deps = select({
            "DEFAULT": [":c10_full_ovrsource"],
            "ovr_config//os:android": [":c10_mobile_ovrsource"],
            "ovr_config//os:iphoneos": [":c10_mobile_ovrsource"],
        }),
        visibility = ["PUBLIC"],
    )
    """
    Most users should use c10_ovrsource, not these targets directly.
    """
    define_c10_ovrsource("c10_mobile_ovrsource", True)
    define_c10_ovrsource("c10_full_ovrsource", False)
    oxx_static_library(
        name = "c10_cuda_ovrsource",
        srcs = native.glob([
            "cuda/*.cpp",
            "cuda/impl/*.cpp",
        ]),
        compatible_with = cuda_supported_platforms,
        compiler_flags = select({
            "DEFAULT": [],
            "ovr_config//compiler:cl": [
                "/w",
            ],
            "ovr_config//toolchain/clang:win": [
                "-Wno-error",
                "-Wno-shadow",
                "-Wno-undef",
                "-Wno-unused-variable",
            ],
        }),
        link_whole = True,
        preprocessor_flags = [
            "-DNO_EXPORT",
            "-DC10_CUDA_BUILD_MAIN_LIB=1",
        ],
        raw_headers = native.glob([
            "cuda/*.h",
            "cuda/impl/*.h",
        ]),
        reexport_all_header_dependencies = False,
        # tests = C10_GPU_TEST_TARGETS,
        visibility = ["PUBLIC"],
        deps = [
            "//third-party/cuda:libcuda",
            "//third-party/cuda:libcudart",
        ],
        exported_deps = c10_cuda_macros + [
            ":c10_ovrsource",
        ],
    )
    # [
    #     oxx_test(
    #         name = paths.basename(test)[:-len(".cpp")] + "_ovrsource",
    #         srcs = [test],
    #         compatible_with = cpu_supported_platforms,
    #         compiler_flags = select({
    #             "DEFAULT": [],
    #             "ovr_config//compiler:cl": [
    #                 "/w",
    #             ],
    #             "ovr_config//compiler:clang": [
    #                 "-Wno-error",
    #                 "-Wno-self-assign-overloaded",
    #                 "-Wno-self-move",
    #                 "-Wno-shadow",
    #                 "-Wno-undef",
    #                 "-Wno-unused-function",
    #                 "-Wno-unused-variable",
    #             ],
    #         }),
    #         framework = "gtest",
    #         oncall = "ovrsource_pytorch",
    #         raw_headers = native.glob([
    #             "test/**/*.h",
    #         ]),
    #         deps = [
    #             ":c10_ovrsource",
    #         ],
    #     )
    #     for test in C10_CPU_TEST_FILES
    # ]
    # [
    #     oxx_test(
    #         name = paths.basename(test)[:-len(".cpp")] + "_ovrsource",
    #         srcs = [test],
    #         compatible_with = cuda_supported_platforms,
    #         compiler_flags = select({
    #             "DEFAULT": [],
    #             "ovr_config//compiler:cl": [
    #                 "/w",
    #             ],
    #             "ovr_config//compiler:clang": [
    #                 "-Wno-error",
    #             ],
    #         }),
    #         framework = "gtest",
    #         oncall = "ovrsource_pytorch",
    #         raw_headers = native.glob([
    #             "test/**/*.h",
    #         ]),
    #         runtime_shared_libraries = [
    #             "//third-party/cuda:cudart",
    #         ],
    #         deps = [
    #             ":c10_cuda_ovrsource",
    #         ],
    #     )
    #     for test in C10_GPU_TEST_FILES
    # ]
--- a/c2_defs.bzl
+++ b/c2_defs.bzl
@ -0,0 +1,549 @@
 load("@bazel_skylib//lib:collections.bzl", "collections")
 load("@bazel_skylib//lib:paths.bzl", "paths")
 load("@fbcode_macros//build_defs:native_rules.bzl", "buck_genrule")
 load("@fbsource//tools/build_defs:default_platform_defs.bzl", "compose_platform_setting_list")
 load("@fbsource//tools/build_defs:dict_defs.bzl", "dict_defs")
 load("@fbsource//tools/build_defs:expect.bzl", "expect")
 load("@fbsource//tools/build_defs:fb_xplat_cxx_library.bzl", "fb_xplat_cxx_library")
 load("@fbsource//tools/build_defs:fbsource_utils.bzl", "is_arvr_mode")
 load("@fbsource//tools/build_defs:platform_defs.bzl", "ANDROID", "APPLE", "CXX", "IOS", "MACOSX", "WINDOWS")
 load("@fbsource//tools/build_defs/apple:build_mode_defs.bzl", "is_production_build")
 load("@fbsource//tools/build_defs/apple:config_utils_defs.bzl", "STATIC_LIBRARY_IOS_CONFIG", "STATIC_LIBRARY_MAC_CONFIG", "fbobjc_configs")
 load("@fbsource//tools/build_defs/apple:focus_config.bzl", "is_focus_enabled")
 load("@fbsource//xplat/pfh/Msgr/Mobile/ProductInfra:DEFS.bzl", "Msgr_Mobile_ProductInfra")
 def get_c2_expose_op_to_c10():
    c2_op_to_c10 = native.read_config("caffe2", "expose_op_to_c10", "0")
    expect(
        c2_op_to_c10 in ("0", "1"),
        c2_op_to_c10,
    )
    return bool(int(c2_op_to_c10))
 def get_c2_mpscnn():
    c2_mpscnn = native.read_config("caffe2", "enable_mpscnn", "1")
    expect(
        c2_mpscnn in ("0", "1"),
        c2_mpscnn,
    )
    return bool(int(c2_mpscnn))
 def get_c2_mpscnn_test():
    c2_mpscnn_test = native.read_config("caffe2", "enable_mpscnn_test", "0")
    expect(
        c2_mpscnn_test in ("0", "1"),
        c2_mpscnn_test,
    )
    return bool(int(c2_mpscnn_test))
 def get_c2_nomnigraph():
    c2_nomnigraph = native.read_config("caffe2", "enable_nomnigraph", "1")
    expect(
        c2_nomnigraph in ("0", "1"),
        c2_nomnigraph,
    )
    return bool(int(c2_nomnigraph))
 def get_c2_qpl():
    c2_qpl = native.read_config("caffe2", "enable_qpl", "1")
    expect(
        c2_qpl in ("0", "1"),
        c2_qpl,
    )
    return bool(int(c2_qpl))
 def get_c2_strip_debug_info():
    c2_strip_debug_info = native.read_config("caffe2", "strip_debug_info", "0")
    expect(
        c2_strip_debug_info in ("0", "1"),
        c2_strip_debug_info,
    )
    return bool(int(c2_strip_debug_info))
 def get_c2_strip_glog():
    c2_strip_glog = native.read_config("caffe2", "strip_glog", "1")
    expect(
        c2_strip_glog in ("0", "1"),
        c2_strip_glog,
    )
    return bool(int(c2_strip_glog))
 def get_c2_tvm():
    c2_tvm = native.read_config("caffe2", "enable_tvm", "1")
    expect(
        c2_tvm in ("0", "1"),
        c2_tvm,
    )
    return bool(int(c2_tvm))
 _C2_XPLAT_NO_HPTT_PREPROCESSOR_FLAGS = [
    "-fexceptions",
    "-frtti",
    "-Wno-shadow",
    "-Wno-unknown-pragmas",
    "-Wno-unused-variable",
    "-Wno-sign-compare",
    "-Icaffe2",
    "-Imodules",
    "-DEIGEN_NO_DEBUG",
    "-DCAFFE2_USE_LITE_PROTO",
    "-DCAFFE2_USE_GOOGLE_GLOG",
    "-DCAFFE2_RNN_NO_TEXT_FORMAT",
    "-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK=1",
    "-DCAFFE2_IS_XPLAT_BUILD",
    "-DSTRIP_ERROR_MESSAGES",
    "-DUSE_INTERNAL_PTHREADPOOL_IMPL",
 ]
 def get_c2_xplat_no_hptt_preprocessor_flags():
    flags = []
    flags += _C2_XPLAT_NO_HPTT_PREPROCESSOR_FLAGS
    if is_arvr_mode() and get_c2_strip_glog():
        flags += ["-UGOOGLE_STRIP_LOG", "-DGOOGLE_STRIP_LOG=1"]
    if get_c2_expose_op_to_c10():
        flags += ["-DEXPOSE_C2_OPS", "-frtti"]
    return flags
 C2_XPLAT_SERVER_PREPROCESSOR_FLAGS = [
    "-DCAFFE2_USE_EIGEN_FOR_BLAS",
    "-DC10_DISABLE_SIGNAL_HANDLERS",
    "-DCAFFE2_DISABLE_NUMA",
 ]
 C2_XPLAT_HPTT_PREPROCESSOR_FLAGS = [
    "-DCAFFE2_USE_HPTT",
 ]
 def get_c2_xplat_preprocessor_flags():
    flags = get_c2_xplat_no_hptt_preprocessor_flags() + C2_XPLAT_HPTT_PREPROCESSOR_FLAGS
    if get_c2_nomnigraph():
        flags.append("-DCAFFE2_OPTIMIZER")
    return flags
 def get_c2_xplat_no_hptt_compiler_flags():
    return [
        "-Os",
    ] + get_c2_xplat_no_hptt_preprocessor_flags()
 def get_c2_xplat_compiler_flags():
    return get_c2_xplat_no_hptt_compiler_flags() + C2_XPLAT_HPTT_PREPROCESSOR_FLAGS
 def get_c2_fbobjc_xplat_compiler_flags():
    flags = []
    if is_production_build():
        flags.append("-DCAFFE2_NO_OPERATOR_SCHEMA")
    flags.append("-DCAFFE2_NO_GRADIENT_OPS")
    # For iOS production builds (and all Android builds), strip GLOG logging to
    # save size. We can disable by setting caffe2.strip_glog=0 in .buckconfig.local.
    if is_production_build() or get_c2_strip_glog():
        flags += ["-UGOOGLE_STRIP_LOG", "-DGOOGLE_STRIP_LOG=3"]
    else:
        flags.append("-UGOOGLE_STRIP_LOG")
    return flags
 def get_c2_fbandroid_xplat_compiler_flags():
    flags = [
        # T95767731 -- remove this once all builds are on at least llvm-13
        "-Wno-unknown-warning-option",
        "-Wno-unused-but-set-variable",
    ]
    if get_c2_strip_glog():
        flags += ["-UGOOGLE_STRIP_LOG", "-DGOOGLE_STRIP_LOG=1"]
    if get_c2_strip_debug_info():
        flags.append("-g0")
    return flags
 _C2_FBOBJC_COMPILER_FLAGS = [
    "-Wno-missing-prototypes",
    "-Wno-global-constructors",
    "-Wno-unknown-pragmas",
    "-Wno-invalid-partial-specialization",
    "-Wno-missing-braces",
    "-Wno-range-loop-analysis",
 ]
 def get_c2_fbobjc_compiler_flags():
    flags = list(_C2_FBOBJC_COMPILER_FLAGS)
    # Avoid linking Accelerate on MacOS because we have
    # inconsistent LAPACK headers (see problems in D19257077).
    flags.append("-DCAFFE2_USE_ACCELERATE" if not is_arvr_mode() else "-DCAFFE2_USE_EIGEN_FOR_BLAS")
    if get_c2_mpscnn():
        flags.append(
            # TODO(t19120552) - fix this. MPSCNNConvolutionDescriptor.strideInPixelsX
            # is marked as iOS 11+, but it's been available since iOS 10.
            "-Wno-unguarded-availability",
        )
    return flags
 C2_FBOBJC_MACOSX_COMPILER_FLAGS = [
    "-msse4.2",
 ]
 C2_FBOBJC_IPHONE_COMPILER_FLAGS = [
    "-mfpu=neon-fp16",
 ]
 def get_c2_fbobjc_frameworks():
    frameworks = []
    if not is_arvr_mode():
        frameworks.append(
            # On iOS, presumably Accelerate is a faster BLAS
            "$SDKROOT/System/Library/Frameworks/Accelerate.framework",
        )
    return frameworks
 def get_c2_fbobjc_ios_frameworks():
    frameworks = []
    if get_c2_mpscnn():
        frameworks.append(
            "$SDKROOT/System/Library/Frameworks/Metal.framework",
        )
    return frameworks
 def get_c2_fbobjc_linker_flags():
    flags = []
    if get_c2_mpscnn():
        # Need linker flags as no platform_frameworks exist, and we can't
        # use MPSCNN on x86_64.
        # We use weak_framework as it's iOS 10
        flags = [
            "-L$SDKROOT/System/Library/Frameworks/MetalPerformanceShaders.framework",
            "-weak_framework",
            "MetalPerformanceShaders",
        ]
    return flags
 def get_c2_fbobjc_exported_preprocessor_flags():
    flags = []
    if get_c2_mpscnn():
        flags.append("-DCAFFE2_USE_MPSCNN")
        if get_c2_mpscnn_test():
            flags.append("-DCAFFE2_USE_MPSCNN_TEST")
    return flags
 def get_c2_fbandroid_exported_preprocessor_flags():
    flags = []
    BUILD_MODE_DO_NOT_USE_WITHOUT_ASKING_SERIOUSLY = native.read_config(
        "fbandroid",
        "build_mode",
        "dev",
    )
    if BUILD_MODE_DO_NOT_USE_WITHOUT_ASKING_SERIOUSLY == "opt":
        flags.append("-DCAFFE2_NO_OPERATOR_SCHEMA")
    flags.append("-DCAFFE2_NO_GRADIENT_OPS")
    return flags
 C2_FBANDROID_COMPILER_FLAGS = [
    "-DCAFFE2_USE_EIGEN_FOR_BLAS",
    "-Wno-unknown-pragmas",
    "-Wno-deprecated-declarations",
    "-Wno-invalid-partial-specialization",
    "-Wno-missing-braces",
 ]
 C2_FBANDROID_ARMV7_COMPILER_FLAGS = [
    "-mfpu=neon-fp16",
 ]
 C2_FBANDROID_X86_COMPILER_FLAGS = [
    "-mssse3",
 ]
 C2_FBANDROID_LINKER_FLAGS = []
 C2_FBOBJC_EXTRA_TARGET_CONFIG = {
    "MTL_LANGUAGE_REVISION": "Metal12",
 }
 def get_c2_default_cxx_args():
    return dict(
        header_namespace = "",
        apple_sdks = (IOS, MACOSX),
        compiler_flags = get_c2_xplat_compiler_flags(),
        fbandroid_compiler_flags = C2_FBANDROID_COMPILER_FLAGS + get_c2_fbandroid_xplat_compiler_flags(),
        fbandroid_exported_platform_preprocessor_flags = [
            (
                "android-armv7",
                get_c2_fbandroid_exported_preprocessor_flags(),
            ),
        ],
        fbandroid_linker_flags = C2_FBANDROID_LINKER_FLAGS,
        fbandroid_platform_compiler_flags = [
            ("android-armv7", C2_FBANDROID_ARMV7_COMPILER_FLAGS),
            (".*x86.*", C2_FBANDROID_X86_COMPILER_FLAGS),
        ],
        fbobjc_compiler_flags = get_c2_fbobjc_compiler_flags() + get_c2_fbobjc_xplat_compiler_flags(),
        fbobjc_configs = fbobjc_configs(
            STATIC_LIBRARY_IOS_CONFIG,
            extra_target_config = C2_FBOBJC_EXTRA_TARGET_CONFIG,
        ),
        fbobjc_exported_platform_linker_flags = [
            (
                "iphoneos",
                get_c2_fbobjc_linker_flags(),
            ),
        ],
        fbobjc_exported_platform_preprocessor_flags = [
            (
                "iphoneos",
                get_c2_fbobjc_exported_preprocessor_flags(),
            ),
        ],
        fbobjc_frameworks = get_c2_fbobjc_frameworks() + get_c2_fbobjc_ios_frameworks(),
        fbobjc_platform_compiler_flags = [
            ("iphoneos", C2_FBOBJC_IPHONE_COMPILER_FLAGS),
        ],
        macosx_compiler_flags = C2_FBOBJC_MACOSX_COMPILER_FLAGS,
        fbobjc_macosx_configs_override = fbobjc_configs(
            STATIC_LIBRARY_MAC_CONFIG,
        ),
        macosx_frameworks_override = get_c2_fbobjc_frameworks(),
        preprocessor_flags = [
            # Use the internal pthreadpool impl for all Caffe2 targets on all
            # platforms but do not export the preprocessor flag downstream.
            "-DUSE_INTERNAL_PTHREADPOOL_IMPL",
        ],
        visibility = ["PUBLIC"],
        windows_preferred_linkage = "static" if is_arvr_mode() else None,
        xcode_public_headers_symlinks = True,
    )
 def get_c2_aten_cpu_fbobjc_macosx_deps():
    if is_focus_enabled():
        # focus2 is broken when using platform deps (T80070498) so in the case
        # where it's focus2 we just add fbgemm as a standard dep. Otherwise we
        # use platform deps to select correctly for arm64.
        return [
            "fbsource//xplat/deeplearning/fbgemm:fbgemm",
            "fbsource//xplat/caffe2:cpukernel_avx2",
        ]
    else:
        return []
 def get_c2_aten_cpu_fbobjc_macosx_platform_deps():
    if is_focus_enabled():
        # focus2 is broken when using platform deps (T80070498) so in the case
        # where it's focus2 we just add fbgemm as a standard dep. Otherwise we
        # use platform deps to select correctly for arm64.
        return []
    else:
        return compose_platform_setting_list([
            {
                "cpu": "x86_64",
                "flags": [
                    "fbsource//xplat/deeplearning/fbgemm:fbgemmAppleMac",
                ] + ([
                    "fbsource//xplat/caffe2:cpukernel_avx2AppleMac",
                ] if not is_arvr_mode() else []),
                "os": "macosx",
            },
            {
                "cpu": "arm64",
                "flags": ["fbsource//xplat/third-party/XNNPACK:XNNPACKAppleMac"],
                "os": "macosx",
            },
        ])
 def c2_cxx_library(**kwargs):
    args = get_c2_default_cxx_args()
    args.update(kwargs)
    args.setdefault("platforms", (ANDROID, APPLE, CXX, WINDOWS))
    fb_xplat_cxx_library(
        labels = [
            "supermodule:android/default/caffe2",
            "supermodule:ios/default/public.caffe2",
        ],
        feature = Msgr_Mobile_ProductInfra,
        **args
    )
 def c2_protobuf_rule(protos):
    cpps = []
    headers = {}
    raw_headers = {}
    for p in protos:
        proto = paths.basename(p)
        if native.host_info().os.is_windows:
            protocexe = "$(exe fbsource//third-party/protobuf:protoc-host)" if is_arvr_mode() else "$(location fbsource//xplat/third-party/protobuf:protoc.Windows)"
            protocmd = "powershell.exe -file $(location fbsource//xplat/caffe2/scripts:proto)\\proto.ps1 -Protoc {} -Unprocessed $SRCDIR/{} -Processed $SRCDIR/{} -out $OUT -srcdir $SRCDIR".format(protocexe, p, proto)
        else:
            protocmd = ("cp $SRCDIR/{} $SRCDIR/{} && chmod +w $SRCDIR/{} && echo \"option optimize_for = LITE_RUNTIME;\" >> $SRCDIR/{} && ".format(p, proto, proto, proto) +
                        "cp $SRCDIR/caffe2/proto/caffe2.proto $SRCDIR/caffe2.proto && chmod +w $SRCDIR/caffe2.proto && echo \"option optimize_for = LITE_RUNTIME;\" >> $SRCDIR/caffe2.proto && " +
                        "sed -i -e 's/caffe2\\/proto\\/caffe2.proto/caffe2.proto/g' $SRCDIR/{} && ".format(proto) +
                        ("$(exe fbsource//third-party/protobuf:protoc-host) " if is_arvr_mode() else "$(exe fbsource//xplat/third-party/protobuf:protoc) --osx $(location fbsource//xplat/third-party/protobuf:protoc.Darwin) --linux $(location fbsource//xplat/third-party/protobuf:protoc.Linux) ") +
                        "-I $SRCDIR --cpp_out=$OUT $SRCDIR/{}".format(proto))
        buck_genrule(
            name = proto,
            srcs = sorted(collections.uniq([p, "caffe2/proto/caffe2.proto"])),
            cmd_exe = protocmd,
            bash = protocmd,
            out = ".",
        )
        (name, _) = paths.split_extension(proto)
        cpp = name + ".pb.cc"
        h = name + ".pb.h"
        buck_genrule(
            name = h,
            cmd_exe = "@powershell -Command \" & { " + "(Get-Content $(location :{})\\{}".format(proto, h) + ") -replace \\\"caffe2.pb.h\\\", \\\"caffe2/proto/caffe2.pb.h\\\" | Set-Content $OUT } \"",
            bash = "cp -f $(location :{})/{} $OUT  && ".format(proto, h) +
                   "sed -i -e 's/caffe2.pb.h/caffe2\\/proto\\/caffe2.pb.h/g' $OUT",
            out = h,
        )
        headers["caffe2/proto/" + h] = ":{}".format(h)
        raw_headers[h] = ":{}".format(h)
        buck_genrule(
            name = cpp,
            cmd_exe = "@powershell -Command copy $(location :{})/{} $OUT".format(proto, cpp),
            bash = "cp -f $(location :{})/{} $OUT".format(proto, cpp),
            out = cpp,
        )
        cpps.append(":{}".format(cpp))
    return (cpps, headers, raw_headers)
 # C2 uses lite version of protobuf while torch/jit uses some method only exists
 # in full protobuf. This is a temporary workaround to enable experiment build.
 # DO NOT USE IT IN PRODUCTION BUILD!
 def c2_full_protobuf_rule(protos):
    prefix = "full_"
    cpps = []
    headers = {}
    raw_headers = {}
    for p in protos:
        proto = paths.basename(p)
        if native.host_info().os.is_windows:
            protocexe = "$(exe fbsource//third-party/protobuf:protoc-host)" if is_arvr_mode() else "$(location fbsource//xplat/third-party/protobuf:protoc.Windows)"
            protocmd = "powershell.exe -file $(location fbsource//xplat/caffe2/scripts:proto)\\proto.ps1 -Protoc {} -Unprocessed $SRCDIR/{} -Processed $SRCDIR/{} -out $OUT -srcdir $SRCDIR".format(protocexe, p, proto)
        else:
            protocmd = ("cp $SRCDIR/{} $SRCDIR/{} && ".format(p, proto) +
                        "cp $SRCDIR/caffe2/proto/caffe2.proto $SRCDIR/caffe2.proto && " +
                        "sed -i -e 's/caffe2\\/proto\\/caffe2.proto/caffe2.proto/g' $SRCDIR/{} && ".format(proto) +
                        ("$(exe fbsource//third-party/protobuf:protoc-host) " if is_arvr_mode() else "$(exe fbsource//xplat/third-party/protobuf:protoc) --osx $(location fbsource//xplat/third-party/protobuf:protoc.Darwin) --linux $(location fbsource//xplat/third-party/protobuf:protoc.Linux) ") +
                        "-I $SRCDIR --cpp_out=$OUT $SRCDIR/{}".format(proto))
        buck_genrule(
            name = prefix + proto,
            srcs = sorted(collections.uniq([p, "caffe2/proto/caffe2.proto"])),
            cmd = protocmd,
            out = ".",
        )
        (name, _) = paths.split_extension(proto)
        cpp = name + ".pb.cc"
        h = name + ".pb.h"
        buck_genrule(
            name = prefix + h,
            cmd_exe = "@powershell -Command \" & { " + "(Get-Content $(location :{})\\{}".format(prefix + proto, h) + ") -replace \\\"caffe2.pb.h\\\", \\\"caffe2/proto/caffe2.pb.h\\\" | Set-Content $OUT } \"",
            bash = "cp -f $(location :{})/{} $OUT  && ".format(prefix + proto, h) +
                   "sed -i -e 's/caffe2.pb.h/caffe2\\/proto\\/caffe2.pb.h/g' $OUT",
            out = h,
        )
        headers["caffe2/proto/" + h] = ":{}".format(prefix + h)
        raw_headers[h] = ":{}".format(prefix + h)
        buck_genrule(
            name = prefix + cpp,
            cmd_exe = "@powershell -Command copy $(location :{})/{} $OUT".format(prefix + proto, cpp),
            bash = "cp -f $(location :{})/{} $OUT".format(prefix + proto, cpp),
            out = cpp,
        )
        cpps.append(":{}".format(prefix + cpp))
    return (cpps, headers, raw_headers)
 def libcaffe2_cxx_library(name, use_hptt, **kwargs):
    c2_cxx_library(
        name = name,
        exported_deps = [
            "fbsource//xplat/caffe2/c10:c10",
            "fbsource//third-party/protobuf:libprotobuf" if is_arvr_mode() else "fbsource//xplat/third-party/protobuf:fb-protobuf-lite",
            ":caffe2_protobuf_headers",
            ":pthreadpool",
            ":common_core",
            ":caffe2_proto_types",
        ],
        compiler_flags = get_c2_xplat_compiler_flags() if use_hptt else get_c2_xplat_no_hptt_compiler_flags(),
        exported_preprocessor_flags = get_c2_xplat_preprocessor_flags() if use_hptt else get_c2_xplat_no_hptt_preprocessor_flags(),
        cxx_preprocessor_flags = C2_XPLAT_SERVER_PREPROCESSOR_FLAGS,
        fbandroid_exported_preprocessor_flags = get_c2_fbandroid_xplat_compiler_flags(),
        fbobjc_exported_preprocessor_flags = get_c2_fbobjc_xplat_compiler_flags(),
        # Hack to work around lack of platform_srcs support in Xcode project generation.
        macosx_extra_xcode_sources_override = [],
        link_whole = True,
        **kwargs
    )
 def c2_operator_library(name, **kwargs):
    dict_defs.key_extend(
        kwargs,
        "deps",
        [
            "fbsource//xplat/folly:molly",
            "fbsource//third-party/glog:glog",
            ":caffe2",
        ] + ([":aten_cpu"] if get_c2_expose_op_to_c10() else []),
    )
    # NOTE: Currently operators can "depend" on other operators, which is used
    # so that loading one will implicitly load the dependencies.  So, make sure
    # that no `--as-needed` flags pulled in from dependencies cause these
    # operator deps to get dropped.
    linker_flags = [
        "-Wl,--no-as-needed",
    ]
    c2_cxx_library(
        name = name,
        soname = "lib" + name + ".$(ext)",
        fbandroid_compiler_flags = get_c2_default_cxx_args()["fbandroid_compiler_flags"] + ["-Os"],
        fbobjc_compiler_flags = get_c2_default_cxx_args()["fbobjc_compiler_flags"] + ["-Oz", "-DCOMPILING_FOR_MIN_SIZE=1"],
        link_whole = True,
        cxx_exported_linker_flags = linker_flags,
        fbandroid_exported_linker_flags = linker_flags,
        exported_deps = [
            ":caffe2",
        ],
        **kwargs
    )
 def c2_genrule(genrule, genfiles, prefix = "", src_path = "", header_namespace = ""):
    headers = {}
    srcs = []
    for generated_filename in genfiles:
        buck_genrule(
            name = prefix + generated_filename,
            bash = "cp -f $(location :{})/{} $OUT".format(genrule, src_path + generated_filename),
            cmd_exe = "@powershell -Command copy $(location :{})/{} $OUT".format(genrule, src_path + generated_filename),
            out = generated_filename,
        )
        rule = ":{}{}".format(prefix, generated_filename)
        headers[header_namespace + generated_filename] = rule
        srcs.append(rule)
    return {"headers": headers, "srcs": srcs}
--- a/c2_test_defs.bzl
+++ b/c2_test_defs.bzl
@ -0,0 +1,20 @@
 load("@fbsource//tools/build_defs:fb_xplat_cxx_test.bzl", "fb_xplat_cxx_test")
 load("@fbsource//tools/build_defs:platform_defs.bzl", "ANDROID", "APPLE", "CXX", "IOS", "MACOSX")
 load("@fbsource//xplat/caffe2:c2_defs.bzl", "get_c2_default_cxx_args")
 def c2_cxx_test(**kwargs):
    args = get_c2_default_cxx_args()
    args.update(kwargs)
    args["fbandroid_use_instrumentation_test"] = True
    for flag in [
        "macosx_compiler_flags",
        "fbobjc_macosx_configs_override",
        "macosx_frameworks_override",
        "xcode_public_headers_symlinks",
        "macosx_inherited_buck_flags_override",
    ]:
        args.pop(flag, None)
    args["apple_sdks"] = (IOS, MACOSX)
    args["platforms"] = (CXX, APPLE, ANDROID)
    args["contacts"] = ["oncall+ai_infra_mobile_platform@xmail.facebook.com"]
    fb_xplat_cxx_test(**args)
--- a/caffe2/BUILD_MODE.bzl
+++ b/caffe2/BUILD_MODE.bzl
@ -0,0 +1,23 @@
 """ build mode definitions for caffe2/caffe2 """
 load("@fbcode//:BUILD_MODE.bzl", get_parent_modes = "all_modes_keep_gpu_sections_all_modes_use_lld")
 load("@fbcode_macros//build_defs:create_build_mode.bzl", "extend_build_mode")
 def update_mode_struct(name, mode_struct):
    if name == "dev":
        return extend_build_mode(
            mode_struct,
            # TODO(ipbrady): Modules introduce floating point inaccuracies (T43879333)
            cxx_modules = False,
        )
    else:
        return mode_struct
 _modes = {
    mode_name: update_mode_struct(mode_name, mode_struct)
    for mode_name, mode_struct in get_parent_modes().items()
 }
 def get_modes():
    """ Return modes for this file """
    return _modes
--- a/caffe2/defs.bzl
+++ b/caffe2/defs.bzl
@ -0,0 +1,89 @@
 # useful command for debugging which files are included:
 # buck targets caffe2/caffe2: --json | jq -r "map(select(.srcs)) | map({key: .name, value: .srcs | sort}) | from_entries"
 load("@fbsource//tools/build_defs:type_defs.bzl", "is_list")
 load("//tools/build/buck:flags.bzl", "get_flags")
 flags = get_flags()
 _BASE_PATHS = (
    "core/*",
    "core/boxing/*",
    "core/boxing/impl/*",
    "core/dispatch/*",
    "core/op_registration/*",
    "cuda_rtc/*",
    "db/*",
    "experiments/operators/*",
    "ideep/**/*",
    "observers/*",
    "onnx/**/*",
    "operators/**/*",
    "observers/*",
    "predictor/*",
    "queue/*",
    "sgd/*",
    "share/contrib/zstd/*",
    "transforms/*",
    "utils/**/*",
 )
 _BASE_SGX_PATHS = (
    "core/*",
    "core/boxing/*",
    "core/boxing/impl/*",
    "core/dispatch/*",
    "core/op_registration/*",
    "cuda_rtc/*",
    "db/*",
    "experiments/operators/*",
    "observers/*",
    "onnx/**/*",
    "operators/**/*",
    "observers/*",
    "predictor/*",
    "queue/*",
    "sgd/*",
    "serialize/*",
    "share/contrib/zstd/*",
    "transforms/*",
    "utils/**/*",
 )
 def get_sgx_patterns(ext):
    if not is_list(ext):
        ext = [ext]
    return [path + e for path in _BASE_SGX_PATHS for e in ext]
 def get_patterns(ext):
    if not is_list(ext):
        ext = [ext]
    return [path + e for path in _BASE_PATHS for e in ext]
 def get_simd_preprocessor_flags():
    return [
        "-DUSE_FBGEMM",
    ]
 def get_simd_compiler_flags():
    if flags.USE_SSE_ONLY:
        return ["-mno-avx"]
    simd_compiler_flags = [
        "-mavx",
    ] + get_simd_preprocessor_flags()
    # Every uarch with AVX512 support has AVX2 support
    if (flags.USE_AVX2 or flags.USE_AVX512):
        simd_compiler_flags += [
            "-mavx2",
            "-mfma",
        ]
    if flags.USE_AVX512:
        simd_compiler_flags += [
            "-mavx512f",
            "-mavx512dq",
            "-mavx512vl",
        ]
    return simd_compiler_flags
--- a/caffe2/defs_hip.bzl
+++ b/caffe2/defs_hip.bzl
@ -0,0 +1,149 @@
 load("@bazel_skylib//lib:paths.bzl", "paths")
 load(
    "//caffe2:defs_hip.bzl",
    "caffe2_includes",
    "caffe2_video_image_includes",
    "get_hip_file_path",
 )
 gpu_file_extensions = [".cu", ".c", ".cc", ".cpp"]
 gpu_header_extensions = [".cuh", ".h", ".hpp"]
 def is_caffe2_gpu_file(filepath):
    # those files are needed since they define placeholders
    if "/native/cudnn/" in filepath:
        return True
    # files that are already compatible with hip
    if "/hip/" in filepath:
        return False
    # exclude all cudnn and nvrtc implementations except for nvrtc_stub
    if "/nvrtc_stub/" in filepath:
        return True
    if any([keyword in filepath for keyword in ("cudnn", "nvrtc", "NVRTC")]):
        return False
    if "/cuda/" in filepath:
        return True
    filename = paths.basename(filepath)
    _, ext = paths.split_extension(filename)
    if "gpu" in filename or ext in [".cu", ".cuh"]:
        return True
    return False
 def get_caffe2_hip_srcs(
        include_patterns = caffe2_includes,
        include_files = [],
        project_dir = "caffe2"):
    gpu_file_pattern = [
        base + suffix
        for base in include_patterns
        for suffix in gpu_file_extensions
    ]
    native_gpu_files = native.glob(gpu_file_pattern) + include_files
    # store the original
    gpu_files = []
    hip_files = []
    for name in native_gpu_files:
        # exclude test files
        if "_test" in paths.basename(name) or not is_caffe2_gpu_file(name):
            continue
        gpu_files.append(name)
        hip_file_name = get_hip_file_path(name, is_caffe2 = True)
        hip_files.append(hip_file_name)
    # there will be some native hip files that needs suffix changed
    native_hip_pattern = [
        base[:-1] + "hip/*.hip"
        for base in include_patterns
    ]
    native_hip_files = native.glob(native_hip_pattern)
    gpu_files += native_hip_files
    hip_files += native_hip_files
    # we run hipify script under the caffe2 folder; therefore we need to
    # prepend caffe2 to the path so that buck can find the hipified file
    real_hip_files = []
    for filename in hip_files:
        real_hip_files.append(paths.join(project_dir, filename))
    # return the src and output_gen files
    return gpu_files, real_hip_files
 def get_caffe2_hip_headers(
        include_patterns = caffe2_includes,
        include_files = [],
        project_dir = "caffe2"):
    header_pattern = [
        base + suffix
        for base in include_patterns
        for suffix in gpu_header_extensions
    ]
    native_header_files = native.glob(header_pattern) + include_files
    header_files = []
    hip_headers = []
    for name in native_header_files:
        # exclude test files
        # if the caller directly specifies files via include_files, follow it
        if not name in include_files and ("_test" in paths.basename(name) or not is_caffe2_gpu_file(name)):
            continue
        header_files.append(name)
        hip_header_name = get_hip_file_path(name, is_caffe2 = True)
        hip_headers.append(hip_header_name)
    # we run hipify script under the caffe2 folder; therefore we need to
    # prepend caffe2 to the path so that buck can find the hipified file
    real_hip_headers = []
    for filename in hip_headers:
        real_hip_headers.append(paths.join(project_dir, filename))
    # return the src and output_gen files
    return header_files, real_hip_headers
 def get_caffe2_hip_video_image_srcs():
    return get_caffe2_hip_srcs(include_patterns = caffe2_video_image_includes)
 def get_caffe2_hip_video_image_headers():
    return get_caffe2_hip_headers(include_patterns = caffe2_video_image_includes)
 def get_caffe2_hip_test_files():
    test_includes = [
        "**/*_gpu_test.cc",
    ]
    # let's ignores the mpi test and fb-internal tests for now
    test_ignores = [
        "mpi/mpi_gpu_test.cc",
        # "operators/roi_align_op_gpu_test.cc",
        "**/fb/**/*_gpu_test.cc",
    ]
    native_test_files = native.glob(test_includes, exclude = test_ignores)
    test_files = []
    hip_test_files = []
    for name in native_test_files:
        if not is_caffe2_gpu_file(name):
            continue
        test_files.append(name)
        hip_file_name = get_hip_file_path(name, is_caffe2 = True)
        hip_test_files.append(hip_file_name)
    # we run hipify script under the caffe2 folder; therefore we need to
    # prepend caffe2 to the path so that buck can find the hipified file
    real_hip_test_files = []
    for filename in hip_test_files:
        real_hip_test_files.append(paths.join("caffe2", filename))
    # return the src and output_gen files
    return test_files, real_hip_test_files
--- a/defs.bzl
+++ b/defs.bzl
@ -0,0 +1,89 @@
 def get_sleef_deps():
    return [("sleef", None, "sleef")] if not (host_info().arch.is_aarch64) else []
 def get_blas_gomp_deps():
    if host_info().arch.is_x86_64:
        return [(
            "IntelComposerXE",
            None,
            native.read_config("fbcode", "mkl_lp64", "mkl_lp64_omp"),
        )]
    if host_info().arch.is_aarch64:
        return [
            ("OpenBLAS", None, "OpenBLAS"),
            ("openmp", None, "omp"),
        ]
    fail("Unsupported architecture")
 default_compiler_flags = [
    "-Wall",
    "-Wextra",
    "-Wno-unused-function",
    "-Wno-unused-parameter",
    "-Wno-error=strict-aliasing",
    "-Wno-unused-local-typedefs",
    "-Wno-shadow-compatible-local",
    "-Wno-maybe-uninitialized",  # aten is built with gcc as part of HHVM
    "-Wno-unknown-pragmas",
    "-Wno-strict-overflow",
    # See https://fb.facebook.com/groups/fbcode/permalink/1813348245368673/
    # These trigger on platform007
    "-Wno-stringop-overflow",
    "-Wno-class-memaccess",
    "-DHAVE_MMAP",
    "-DUSE_GCC_ATOMICS=1",
    "-D_FILE_OFFSET_BITS=64",
    "-DHAVE_SHM_OPEN=1",
    "-DHAVE_SHM_UNLINK=1",
    "-DHAVE_MALLOC_USABLE_SIZE=1",
    "-DTH_HAVE_THREAD",
    "-DCPU_CAPABILITY_DEFAULT",
    "-DTH_INDEX_BASE=0",
    "-DMAGMA_V2",
    "-DNO_CUDNN_DESTROY_HANDLE",
    "-DUSE_FBGEMM",
    "-DUSE_QNNPACK",
    "-DUSE_PYTORCH_QNNPACK",
    # The dynamically loaded NVRTC trick doesn't work in fbcode,
    # and it's not necessary anyway, because we have a stub
    # nvrtc library which we load canonically anyway
    "-DUSE_DIRECT_NVRTC",
    "-DUSE_RUY_QMATMUL",
 ] + ([] if native.host_info().os.is_windows else [
    # XNNPACK depends on an updated version of pthreadpool interface, whose implementation
    # includes <pthread.h> - a header not available on Windows.
    "-DUSE_XNNPACK",
 ]) + (["-O1"] if native.read_config("fbcode", "build_mode_test_label", "") == "dev-nosan" else [])
 compiler_specific_flags = {
    "clang": [
        "-Wno-absolute-value",
        "-Wno-pass-failed",
        "-Wno-braced-scalar-init",
    ],
    "gcc": [
        "-Wno-error=array-bounds",
    ],
 }
 def get_cpu_parallel_backend_flags():
    parallel_backend = native.read_config("pytorch", "parallel_backend", "openmp")
    defs = []
    if parallel_backend == "openmp":
        defs.append("-DAT_PARALLEL_OPENMP_FBCODE=1")
    elif parallel_backend == "tbb":
        defs.append("-DAT_PARALLEL_NATIVE_TBB_FBCODE=1")
    elif parallel_backend == "native":
        defs.append("-DAT_PARALLEL_NATIVE_FBCODE=1")
    else:
        fail("Unsupported parallel backend: " + parallel_backend)
    if native.read_config("pytorch", "exp_single_thread_pool", "0") == "1":
        defs.append("-DAT_EXPERIMENTAL_SINGLE_THREAD_POOL=1")
    mkl_ver = native.read_config("fbcode", "mkl_lp64", "mkl_lp64_omp")
    if mkl_ver == "mkl_lp64_seq":
        defs.append("-DATEN_MKL_SEQUENTIAL_FBCODE=1")
    return defs
 def is_cpu_static_dispatch_build():
    mode = native.read_config("fbcode", "caffe2_static_dispatch_mode", "none")
    return mode == "cpu"
--- a/defs_gpu.bzl
+++ b/defs_gpu.bzl
@ -0,0 +1,166 @@
 load("@fbcode_macros//build_defs:native_rules.bzl", "buck_genrule")
 load(
    "//caffe2/caffe2:defs_hip.bzl",
    "get_caffe2_hip_headers",
    "get_caffe2_hip_srcs",
 )
 load(":ufunc_defs.bzl", "aten_ufunc_names")
 ATEN_CUDA_H_PATTERN = [
    "aten/src/ATen/cuda/*.h",
    "aten/src/ATen/cuda/detail/*.h",
    "aten/src/ATen/cuda/nvrtc_stub/*.h",
    "aten/src/ATen/cuda/*.cuh",
    "aten/src/ATen/cuda/detail/*.cuh",
 ]
 ATEN_CUDA_CPP_PATTERN = [
    "aten/src/ATen/cuda/*.cpp",
    "aten/src/ATen/cuda/detail/*.cpp",
    "aten/src/ATen/cuda/nvrtc_stub/*.cpp",
 ]
 ATEN_CUDA_CU_PATTERN = [
    "aten/src/ATen/cuda/*.cu",
    "aten/src/ATen/cuda/detail/*.cu",
 ]
 ATEN_CUDNN_H_PATTERN = [
    "aten/src/ATen/cudnn/*.h",
    "aten/src/ATen/cudnn/*.cuh",
 ]
 ATEN_CUDNN_CPP_PATTERN = ["aten/src/ATen/cudnn/*.cpp"]
 ATEN_MIOPEN_H_PATTERN = [
    "aten/src/ATen/miopen/*.h",
    "aten/src/ATen/miopen/*.cuh",
 ]
 ATEN_MIOPEN_CPP_PATTERN = ["aten/src/ATen/miopen/*.cpp"]
 ATEN_NATIVE_CUDNN_CPP_PATTERN = ["aten/src/ATen/native/cudnn/*.cpp"]
 ATEN_NATIVE_MIOPEN_CPP_PATTERN = ["aten/src/ATen/native/miopen/*.cpp"]
 ATEN_NATIVE_CUDA_CU_PATTERN = [
    "aten/src/ATen/native/cuda/*.cu",
    "aten/src/ATen/native/nested/cuda/*.cu",
    "aten/src/ATen/native/quantized/cuda/*.cu",
    "aten/src/ATen/native/sparse/cuda/*.cu",
    "aten/src/ATen/native/transformers/**/*.cu",
 ]
 ATEN_NATIVE_CUDA_CPP_PATTERN = [
    "aten/src/ATen/native/cuda/*.cpp",
    "aten/src/ATen/native/cuda/linalg/*.cpp",
    "aten/src/ATen/native/nested/cuda/*.cpp",
    "aten/src/ATen/native/sparse/cuda/*.cpp",
    "aten/src/ATen/native/transformers/cuda/*.cpp",
 ]
 ATEN_NATIVE_CUDA_H_PATTERN = [
    "aten/src/ATen/native/cudnn/**/*.h",
    "aten/src/ATen/native/cuda/**/*.h",
    "aten/src/ATen/native/cuda/**/*.cuh",
    "aten/src/ATen/native/sparse/cuda/*.h",
    "aten/src/ATen/native/sparse/cuda/*.cuh",
    "aten/src/ATen/native/quantized/cuda/*.h",
    "aten/src/ATen/native/transformers/cuda/*.h",
    "aten/src/ATen/native/transformers/**/*.cuh",
 ]
 # T66678203: Clang CUDA rollout
 ATEN_CUDA_CLANG_CU_PATTERN = [
    "aten/src/ATen/native/cuda/DistributionBernoulli.cu",
 ]
 ### Cuda Files
 def get_aten_cuda_headers():
    ATEN_CUDA_H = native.glob(ATEN_CUDA_H_PATTERN)
    ATEN_NATIVE_CUDA_H = native.glob(ATEN_NATIVE_CUDA_H_PATTERN)
    ATEN_CUDNN_H = native.glob(ATEN_CUDNN_H_PATTERN)
    return ATEN_CUDA_H + ATEN_NATIVE_CUDA_H + ATEN_CUDNN_H
 def get_aten_cuda_srcs():
    ATEN_CUDA_CU = native.glob(ATEN_CUDA_CU_PATTERN)
    ATEN_NATIVE_CUDA_CU = native.glob(
        ATEN_NATIVE_CUDA_CU_PATTERN,
        exclude = ATEN_CUDA_CLANG_CU_PATTERN,
    )
    return ATEN_CUDA_CU + ATEN_NATIVE_CUDA_CU
 def get_aten_cuda_clang_srcs():
    return native.glob(ATEN_CUDA_CLANG_CU_PATTERN)
 # CPU+CUDA file
 # Note that these sources and headers include the CPU lists too
 def get_all_cuda_srcs():
    ATEN_NATIVE_CUDNN_CPP = native.glob(ATEN_NATIVE_CUDNN_CPP_PATTERN)
    ATEN_CUDNN_CPP = native.glob(ATEN_CUDNN_CPP_PATTERN)
    ATEN_NATIVE_MIOPEN_CPP = native.glob(ATEN_NATIVE_MIOPEN_CPP_PATTERN)
    ATEN_CUDA_CPP = native.glob(ATEN_CUDA_CPP_PATTERN)
    ATEN_NATIVE_CUDA_CPP = native.glob(ATEN_NATIVE_CUDA_CPP_PATTERN)
    return ATEN_NATIVE_CUDNN_CPP + ATEN_CUDNN_CPP + ATEN_NATIVE_MIOPEN_CPP + ATEN_CUDA_CPP + ATEN_NATIVE_CUDA_CPP + get_aten_cuda_srcs()
 ### HIP files
 # Files that must be hipified
 def get_aten_hip_srcs():
    ## CU -> HIP files
    ATEN_CUDA_CU = native.glob(ATEN_CUDA_CU_PATTERN)
    # HIP does not use clang for ATEN_CUDA_CLANG_CU_PATTERN
    ATEN_NATIVE_CUDA_CU = native.glob(ATEN_NATIVE_CUDA_CU_PATTERN)
    ## CPU files
    ATEN_NATIVE_CUDNN_CPP = native.glob(ATEN_NATIVE_CUDNN_CPP_PATTERN)
    ATEN_CUDNN_CPP = native.glob(ATEN_CUDNN_CPP_PATTERN)
    ATEN_CUDA_CPP = native.glob(ATEN_CUDA_CPP_PATTERN)
    ATEN_NATIVE_CUDA_CPP = native.glob(ATEN_NATIVE_CUDA_CPP_PATTERN)
    # Get hipified file names (before, after)
    srcs = ATEN_CUDA_CU + ATEN_NATIVE_CUDA_CU + ATEN_NATIVE_CUDNN_CPP + ATEN_CUDNN_CPP + ATEN_CUDA_CPP + ATEN_NATIVE_CUDA_CPP
    ret = get_caffe2_hip_srcs(include_patterns = [], include_files = srcs, project_dir = "")
    return (ret[0], [f.replace("aten/src/", "") for f in ret[1]])
 def get_aten_hip_headers():
    ATEN_CUDA_H = native.glob(ATEN_CUDA_H_PATTERN)
    ATEN_NATIVE_CUDA_H = native.glob(ATEN_NATIVE_CUDA_H_PATTERN)
    ATEN_CUDNN_H = []  # native.glob(ATEN_CUDNN_H_PATTERN)
    # Get hipified file names (before, after)
    srcs = ATEN_CUDA_H + ATEN_NATIVE_CUDA_H + ATEN_CUDNN_H
    ret = get_caffe2_hip_headers(include_patterns = [], include_files = ATEN_CUDA_H + ATEN_NATIVE_CUDA_H + ATEN_CUDNN_H, project_dir = "")
    return ret[0], [f.replace("aten/src/", "") for f in ret[1]]
 # Native HIP-aware files
 def get_aten_hip_native_srcs():
    HIP_IMPL_CPP = native.glob(["aten/src/ATen/hip/impl/*.cpp"])
    ATEN_MIOPEN_CPP = native.glob(ATEN_MIOPEN_CPP_PATTERN)
    ATEN_NATIVE_MIOPEN_CPP = native.glob(ATEN_NATIVE_MIOPEN_CPP_PATTERN)
    return HIP_IMPL_CPP + ATEN_MIOPEN_CPP + ATEN_NATIVE_MIOPEN_CPP
 def get_aten_hip_native_headers():
    HIP_IMPL_H = native.glob(["aten/src/ATen/hip/impl/*.h"])
    ATEN_MIOPEN_H = native.glob(ATEN_MIOPEN_H_PATTERN)
    return HIP_IMPL_H + ATEN_MIOPEN_H
 def get_aten_hip_ufunc_generated_cuda_sources(gencode_pattern = "{}"):
    # Contents of these CUDA files do not need to be hipified at this point,
    # but they must be renamed from ".cu" to ".hip" because, unlike OSS, a compiler
    # is selected based on a file extension.
    renamed_rules = []
    for n in aten_ufunc_names:
        cuda_name = "UfuncCUDA_{}.cu".format(n)
        hip_name = "UfuncCUDA_{}.hip".format(n)
        buck_genrule(
            name = "aten_ufunc_hip_renamed_{}".format(n),
            srcs = [gencode_pattern.format(cuda_name)],
            bash = 'cp "$SRCDIR/{}" "$OUT"'.format(cuda_name),
            out = hip_name,
            default_outs = [],
        )
        renamed_rules.append(":aten_ufunc_hip_renamed_{}".format(n))
    return renamed_rules
--- a/defs_hip.bzl
+++ b/defs_hip.bzl
@ -0,0 +1,136 @@
 load("@bazel_skylib//lib:paths.bzl", "paths")
 load("@fbcode//tools/build/buck:rocm_flags.bzl", "get_rocm_arch_args")
 caffe2_includes = [
    "operators/**/*",
    "operators/*",
    "sgd/*",
    "transforms/*",
    # distributed folder is managed by its own TARGETS file
    # "distributed/*",
    "queue/*",
    # "binaries/*",
    "**/*_test*",
    "core/*",
    "db/*",
    "utils/**/*",
 ]
 caffe2_video_image_includes = [
    "image/*",
    "video/*",
 ]
 pytorch_includes = [
    "aten/src/ATen/cuda/*",
    "aten/src/ATen/native/cuda/*",
    "aten/src/ATen/native/cuda/linalg/*",
    "aten/src/ATen/native/cudnn/*",
    "aten/src/ATen/native/nested/cuda/*",
    "aten/src/ATen/native/sparse/cuda/*",
    "aten/src/ATen/native/transformers/cuda/*",
    "aten/src/THC/*",
    "aten/src/ATen/test/*",
    "torch/*",
 ]
 gpu_file_extensions = [".cu", ".c", ".cc", ".cpp"]
 gpu_header_extensions = [".cuh", ".h", ".hpp"]
 hip_external_deps = [
    ("rocm", None, "amdhip64-lazy"),
    ("rocm", None, "MIOpen-lazy"),
    ("rocm", None, "rccl-lazy"),
    ("rocm", None, "roctracer64-lazy"),
 ]
 hip_pp_flags = [
    # HIP 4.4.21432 -> TORCH_HIP_VERSION=404
    "-DTORCH_HIP_VERSION=(FB_HIP_VERSION/100000)",
    # ROCm 4.5.2 -> ROCM_VERSION=40502
    "-DROCM_VERSION=FB_ROCM_VERSION",
    "-DUSE_ROCM=1",
    "-D__HIP_PLATFORM_HCC__=1",
    "-D__HIP_NO_HALF_OPERATORS__=1",
    "-D__HIP_NO_HALF_CONVERSIONS__=1",
    "-DCUDA_HAS_FP16=1",
    "-DCAFFE2_USE_MIOPEN",
    # The c10/cuda/impl/cuda_cmake_macros.h is not generated for the
    # hip build yet.
    "-DC10_HIP_NO_CMAKE_CONFIGURE_FILE",
    # clang with -fopenmp=libgomp (gcc's OpenMP runtime library) produces
    #      single threaded code and doesn't define -D_OPENMP by default.
    # clang with -fopenmp or -fopenmp=libomp (llvm's OpenMP runtime library)
    #      produces multi-threaded code and defines -D_OPENMP by default.
    #
    # hcc currently don't have llvm openmp runtime project builtin.
    # wrap_hip.py also drops -D_OPENMP if explicitly specified.
    "-U_OPENMP",
 ]
 def get_hip_flags():
    return [
        # Caffe2 cannot be compiled with NDEBUG using ROCm 4.5.2.
        # TODO: The issue should be fixed properly.
        "-UNDEBUG",
        "-Wno-error=absolute-value",
        "-Wno-macro-redefined",
        "-Wno-inconsistent-missing-override",
        "-Wno-exceptions",
        "-Wno-shift-count-negative",
        "-Wno-shift-count-overflow",
        "-Wno-duplicate-decl-specifier",
        "-Wno-implicit-int-float-conversion",
        "-Wno-unused-result",
        "-Wno-pass-failed",
        "-Wno-unknown-pragmas",
        "-Wno-cuda-compat",
    ] + get_rocm_arch_args()
 def get_hip_file_path(filepath, is_caffe2 = False):
    """
    this function should be in sync with the hipified script in
    third-party/hipify_torch/hipify/hipify_python.py
    unfortunately because it's a normal python (instead of Starlark)
    we cannot simply import from there
    The general rule of converting file names from cuda to hip is:
       - If there is a directory component named "cuda", replace
         it with "hip", AND
       - If the file name contains "CUDA", replace it with "HIP", AND
    If NONE of the above occurred, then insert "hip" in the file path
    as the direct parent folder of the file
    Furthermore, ALWAYS replace '.cu' with '.hip', because those files
    contain CUDA kernels that needs to be hipified and processed with
    hcc compile
    """
    dirpath = paths.dirname(filepath)
    filename = paths.basename(filepath)
    filename, ext = paths.split_extension(filename)
    if ext == ".cu":
        ext = ".hip"
    orig_dirpath = dirpath
    dirpath = dirpath.replace("cuda", "hip")
    dirpath = dirpath.replace("THC", "THH")
    filename = filename.replace("cuda", "hip")
    filename = filename.replace("CUDA", "HIP")
    # Special case to handle caffe2/core/THCCachingAllocator
    if not (is_caffe2 and dirpath == "core"):
        filename = filename.replace("THC", "THH")
    # if the path doesn't change (e.g., path doesn't include "cuda" so we
    # cannot differentiate), insert "hip" as the direct parent folder
    # special case for utils/cub_namespace, because it is first used and hipified when used
    # from core, it doesn't end up in hip directory
    if dirpath == orig_dirpath and not filename == "cub_namespace":
        dirpath = paths.join(dirpath, "hip")
    return paths.join(dirpath, filename + ext)
--- a/ios/METADATA.bzl
+++ b/ios/METADATA.bzl
@ -0,0 +1,10 @@
 # THIS FILE IS AUTOMATICALLY GENERATED FROM INFORMATION STORED IN
 # THIRD-PARTY METADATA SERVICE. YOUR MANUAL CHANGES TO THIS FILE WILL
 # BE PRESERVED AND WILL SERVE AS THE SOURCE OF TRUTH FOR METADATA OF
 # THIS PACKAGE.
 # TPMS-GENERATED: b832a8f526016b30c557d8a58fc89d9338a51cff
 METADATA = {
    "name": "LibTorch",
    "owner": "ai_infra_mobile_platform",
    "version": "1.11.0",
 }
--- a/ios/TestApp/METADATA.bzl
+++ b/ios/TestApp/METADATA.bzl
@ -0,0 +1,10 @@
 # THIS FILE IS AUTOMATICALLY GENERATED FROM INFORMATION STORED IN
 # THIRD-PARTY METADATA SERVICE. YOUR MANUAL CHANGES TO THIS FILE WILL
 # BE PRESERVED AND WILL SERVE AS THE SOURCE OF TRUTH FOR METADATA OF
 # THIS PACKAGE.
 # TPMS-GENERATED: ba55575493b7ad21fde900f05f93c501b2715a09
 METADATA = {
    "name": "unf_ext",
    "owner": "ai_infra_mobile_platform",
    "version": "0.0.7.6",
 }
--- a/ovrsource_aten_gen_defs.bzl
+++ b/ovrsource_aten_gen_defs.bzl
@ -0,0 +1,83 @@
 # @nolint
 load("//arvr/tools/build_defs:genrule_utils.bzl", "gen_cmake_header")
 load("//arvr/tools/build_defs:oxx.bzl", "oxx_static_library")
 load(
    "@fbsource//xplat/caffe2:pt_defs.bzl",
    "gen_aten_files",
    "get_aten_codegen_extra_params",
 )
 def define_aten_gen():
    backends = [
        "CPU",
        "SparseCPU",
        "SparseCsrCPU",
        # "MkldnnCPU",
        "CUDA",
        "SparseCUDA",
        "SparseCsrCUDA",
        "QuantizedCPU",
        "QuantizedCUDA",
        "Meta",
        "ZeroTensor"
    ]
    gen_aten_files(
        name = "gen_aten_ovrsource",
        extra_flags = get_aten_codegen_extra_params(backends),
        visibility = ["PUBLIC"],
    )
    oxx_static_library(
        name = "ovrsource_aten_generated_cuda_headers",
        header_namespace = "ATen",
        public_generated_headers = {
            "CUDAFunctions.h": ":gen_aten_ovrsource[CUDAFunctions.h]",
            "CUDAFunctions_inl.h": ":gen_aten_ovrsource[CUDAFunctions_inl.h]",
        },
        visibility = ["PUBLIC"],
    )
    oxx_static_library(
        name = "ovrsource_aten_generated_meta_headers",
        header_namespace = "ATen",
        public_generated_headers = {
            "MetaFunctions.h": ":gen_aten_ovrsource[MetaFunctions.h]",
            "MetaFunctions_inl.h": ":gen_aten_ovrsource[MetaFunctions_inl.h]",
        },
        visibility = ["PUBLIC"],
    )
    gen_cmake_header(
        src = "aten/src/ATen/Config.h.in",
        defines = [
            ("@AT_MKLDNN_ENABLED@", "0"),
            ("@AT_MKL_ENABLED@", "0"),
            ("@AT_MKL_SEQUENTIAL@", "0"),
            ("@AT_FFTW_ENABLED@", "0"),
            ("@AT_NNPACK_ENABLED@", "0"),
            ("@AT_PARALLEL_OPENMP@", "0"),
            ("@AT_PARALLEL_NATIVE@", "1"),
            ("@AT_PARALLEL_NATIVE_TBB@", "0"),
            ("@AT_POCKETFFT_ENABLED@", "0"),
            ("@CAFFE2_STATIC_LINK_CUDA_INT@", "1"),
            ("@AT_BUILD_WITH_BLAS@", "1"),
            ("@AT_BUILD_WITH_LAPACK@", "1"),
            ("@AT_BLAS_F2C@", "1"),
            ("@AT_BLAS_USE_CBLAS_DOT@", "0")
        ],
        header = "ATen/Config.h",
        prefix = "ovrsource_aten_",
    )
    gen_cmake_header(
        src = "aten/src/ATen/cuda/CUDAConfig.h.in",
        defines = [
            ("@AT_CUDNN_ENABLED@", "1"),
            ("@AT_ROCM_ENABLED@", "0"),
            ("@NVCC_FLAGS_EXTRA@", " "),
            ("@AT_MAGMA_ENABLED@", "0")
        ],
        header = "ATen/cuda/CUDAConfig.h",
        prefix = "ovrsource_aten_",
    )
--- a/ovrsource_caffe2_perfkernels_defs.bzl
+++ b/ovrsource_caffe2_perfkernels_defs.bzl
@ -0,0 +1,87 @@
 # @nolint
 load("//arvr/tools/build_defs:oxx.bzl", "oxx_static_library")
 load("@fbsource//xplat/caffe2/c10:ovrsource_defs.bzl", "cpu_supported_platforms")
 def define_caffe2_perfkernels():
    [
        oxx_static_library(
            name = "perfkernels_{}_ovrsource".format(arch),
            srcs = native.glob(["caffe2/perfkernels/*_{}.cc".format(arch)]),
            compatible_with = ["ovr_config//cpu:x86_64"],
            compiler_flags = select({
                "DEFAULT": [],
                "ovr_config//compiler:cl": [
                    "/arch:AVX2",
                    "/w",
                ],
                "ovr_config//compiler:clang": [
                    "-Wno-error",
                    "-mf16c",
                ] + (["-mf16c", "-mavx"] if arch == "avx" else ["-mfma", "-mavx2"] if arch == "avx2" else ["-mavx512f"]),
            }),
            raw_headers = native.glob([
                "caffe2/core/*.h",
                "caffe2/perfkernels/*.h",
                "caffe2/proto/*.h",
                "caffe2/utils/*.h",
            ], exclude = [
                "caffe2/core/macros.h",
            ]),
            reexport_all_header_dependencies = False,
            deps = [
                ":caffe2_proto_ovrsource",
                ":ovrsource_caffe2_macros.h",
                "@fbsource//xplat/caffe2/c10:c10_ovrsource",
            ],
        )
        for arch in ["avx", "avx2", "avx512"]
    ]
    oxx_static_library(
        name = "perfkernels_ovrsource",
        srcs = native.glob([
            "caffe2/perfkernels/*.cc",
        ], exclude = [
            "**/*_avx*",
        ]),
        compatible_with = cpu_supported_platforms,
        compiler_flags = select({
            "DEFAULT": [],
            "ovr_config//compiler:cl": [
                "/w",
            ],
            "ovr_config//compiler:clang": [
                "-Wno-macro-redefined",
                "-Wno-shadow",
                "-Wno-undef",
                "-Wno-unused-function",
                "-Wno-unused-local-typedef",
                "-Wno-unused-variable",
            ],
        }),
        public_include_directories = [],
        public_raw_headers = native.glob([
            "caffe2/perfkernels/*.h",
        ]),
        raw_headers = native.glob([
            "caffe2/core/*.h",
            "caffe2/proto/*.h",
            "caffe2/utils/*.h",
        ], exclude = [
            "caffe2/core/macros.h",
        ]),
        reexport_all_header_dependencies = False,
        deps = [
            ":caffe2_proto_ovrsource",
            ":ovrsource_caffe2_macros.h",
            "//third-party/cpuinfo:cpuinfo",
            "@fbsource//xplat/caffe2/c10:c10_ovrsource",
            "//third-party/protobuf:libprotobuf",
        ] + select({
            "DEFAULT": [],
            "ovr_config//cpu:x86_64": [
                ":perfkernels_avx_ovrsource",
                ":perfkernels_avx2_ovrsource",
            ],
        }),
    )
--- a/ovrsource_caffe2_proto_defs.bzl
+++ b/ovrsource_caffe2_proto_defs.bzl
@ -0,0 +1,20 @@
 # @nolint
 load("//arvr/tools/build_defs:oxx.bzl", "oxx_static_library", "oxx_test")
 load("//arvr/tools/build_defs:oxx_python.bzl", "oxx_python_binary", "oxx_python_library")
 load("//arvr/tools/build_defs:genrule_utils.bzl", "gen_cmake_header")
 load("//arvr/tools/build_defs:protobuf.bzl", "proto_cxx_library")
 load("@bazel_skylib//lib:paths.bzl", "paths")
 def define_caffe2_proto():
    proto_cxx_library(
        name = "caffe2_proto_ovrsource",
        protos = [
            "caffe2/proto/caffe2.proto",
            "caffe2/proto/caffe2_legacy.proto",
            "caffe2/proto/hsm.proto",
            "caffe2/proto/metanet.proto",
            "caffe2/proto/predictor_consts.proto",
            "caffe2/proto/prof_dag.proto",
            "caffe2/proto/torch.proto",
        ],
    )
--- a/ovrsource_nomnigraph_defs.bzl
+++ b/ovrsource_nomnigraph_defs.bzl
@ -0,0 +1,101 @@
 # @nolint
 load("//arvr/tools/build_defs:oxx.bzl", "oxx_static_library", "oxx_test")
 load("//arvr/tools/build_defs:oxx_python.bzl", "oxx_python_binary", "oxx_python_library")
 load("//arvr/tools/build_defs:genrule_utils.bzl", "gen_cmake_header")
 load("@bazel_skylib//lib:paths.bzl", "paths")
 def define_nomnigraph():
    oxx_python_binary(
        name = "nomnigraph_gen_py_ovrsource",
        main_module = "caffe2.core.nomnigraph.op_gen",
        deps = [":nomnigraph_gen_py_main_ovrsource"],
    )
    oxx_python_library(
        name = "nomnigraph_gen_py_main_ovrsource",
        srcs = native.glob(["caffe2/core/nomnigraph/*.py"]),
        base_module = "",
    )
    nomnigraph_gen_py_cmd = " ".join([
        "--install_dir=$OUT",
        "--source_def=caffe2/core/nomnigraph/ops.def",
        # "--source_def=caffe2/core/nomnigraph/fb/ops.def",
    ])
    native.genrule(
        name = "nomnigraph_gen_ovrsource",
        srcs = [
            # "caffe2/core/nomnigraph/fb/ops.def",
            "caffe2/core/nomnigraph/op_gen.py",
            "caffe2/core/nomnigraph/ops.def",
        ],
        cmd_exe = "mkdir $OUT && $(exe :nomnigraph_gen_py_ovrsource) " + nomnigraph_gen_py_cmd,
        out = "gen",
    )
    TEST_SRCS = native.glob([
        "caffe2/core/nomnigraph/tests/*.cc",
    ], exclude = [
        "caffe2/core/nomnigraph/tests/GraphTest.cc",  # fails because debug iterator check
    ])
    oxx_static_library(
        name = "nomnigraph_ovrsource",
        srcs = [
            "caffe2/core/nomnigraph/Representations/NeuralNet.cc",
        ],
        compiler_flags = select({
            "ovr_config//compiler:clang": [
                "-Wno-undef",
                "-Wno-shadow",
                "-Wno-macro-redefined",
                "-Wno-unused-variable",
                "-Wno-unused-local-typedef",
                "-Wno-unused-function",
            ],
            "DEFAULT": [],
        }),
        public_include_directories = ["caffe2/core/nomnigraph/include"],
        public_raw_headers = native.glob([
            "caffe2/core/nomnigraph/include/**/*.h",
        ]),
        raw_headers = ["caffe2/core/common.h"],
        reexport_all_header_dependencies = False,
        tests = [
            ":" + paths.basename(filename)[:-len(".cc")] + "_ovrsource"
            for filename in TEST_SRCS
        ],
        deps = [
            ":ovrsource_caffe2_macros.h",
            "@fbsource//xplat/caffe2/c10:c10_ovrsource",
        ],
    )
    [
        oxx_test(
            name = paths.basename(filename)[:-len(".cc")] + "_ovrsource",
            srcs = [
                filename,
                "caffe2/core/nomnigraph/tests/test_util.cc",
            ],
            compiler_flags = select({
                "ovr_config//compiler:clang": [
                    "-Wno-macro-redefined",
                    "-Wno-shadow",
                    "-Wno-undef",
                    "-Wno-unused-variable",
                ],
                "DEFAULT": [],
            }),
            framework = "gtest",
            oncall = "frl_gemini",
            raw_headers = native.glob([
                "caffe2/core/nomnigraph/tests/*.h",
            ]),
            deps = [
                ":nomnigraph_ovrsource",
            ],
        )
        for filename in TEST_SRCS
    ]
--- a/pt_template_srcs.bzl
+++ b/pt_template_srcs.bzl
@ -0,0 +1,239 @@
 # This file keeps a list of PyTorch source files that are used for templated selective build.
 # NB: as this is PyTorch Edge selective build, we assume only CPU targets are
 # being built
 load("@bazel_skylib//lib:paths.bzl", "paths")
 load("@fbsource//tools/build_defs:fbsource_utils.bzl", "is_arvr_mode")
 load(":build_variables.bzl", "aten_native_source_list")
 load(
    ":ufunc_defs.bzl",
    "aten_ufunc_generated_cpu_kernel_sources",
    "aten_ufunc_generated_cpu_sources",
 )
 # Files in this list are supposed to be built separately for each app,
 # for different operator allow lists.
 TEMPLATE_SOURCE_LIST = [
    "torch/csrc/jit/runtime/register_prim_ops.cpp",
    "torch/csrc/jit/runtime/register_special_ops.cpp",
 ] + aten_native_source_list
 # For selective build, we can lump the CPU and CPU kernel sources altogether
 # because there is only ever one vectorization variant that is compiled
 def aten_ufunc_generated_all_cpu_sources(gencode_pattern = "{}"):
    return (
        aten_ufunc_generated_cpu_sources(gencode_pattern) +
        aten_ufunc_generated_cpu_kernel_sources(gencode_pattern)
    )
 TEMPLATE_MASKRCNN_SOURCE_LIST = [
    "register_maskrcnn_ops.cpp",
 ]
 TEMPLATE_BATCH_BOX_COX_SOURCE_LIST = [
    "register_batch_box_cox_ops.cpp",
 ]
 METAL_SOURCE_LIST = [
    "aten/src/ATen/native/metal/MetalAten.mm",
    "aten/src/ATen/native/metal/MetalGuardImpl.cpp",
    "aten/src/ATen/native/metal/MetalPrepackOpRegister.cpp",
    "aten/src/ATen/native/metal/MetalCommandBuffer.mm",
    "aten/src/ATen/native/metal/MetalContext.mm",
    "aten/src/ATen/native/metal/MetalConvParams.mm",
    "aten/src/ATen/native/metal/MetalTensorImplStorage.mm",
    "aten/src/ATen/native/metal/MetalTensorUtils.mm",
    "aten/src/ATen/native/metal/mpscnn/MPSCNNClampOp.mm",
    "aten/src/ATen/native/metal/mpscnn/MPSCNNConvOp.mm",
    "aten/src/ATen/native/metal/mpscnn/MPSCNNFullyConnectedOp.mm",
    "aten/src/ATen/native/metal/mpscnn/MPSCNNNeuronOp.mm",
    "aten/src/ATen/native/metal/mpscnn/MPSCNNUtils.mm",
    "aten/src/ATen/native/metal/mpscnn/MPSImage+Tensor.mm",
    "aten/src/ATen/native/metal/mpscnn/MPSImageUtils.mm",
    "aten/src/ATen/native/metal/mpscnn/MPSImageWrapper.mm",
    "aten/src/ATen/native/metal/ops/MetalAddmm.mm",
    "aten/src/ATen/native/metal/ops/MetalBinaryElementwise.mm",
    "aten/src/ATen/native/metal/ops/MetalChunk.mm",
    "aten/src/ATen/native/metal/ops/MetalClamp.mm",
    "aten/src/ATen/native/metal/ops/MetalConcat.mm",
    "aten/src/ATen/native/metal/ops/MetalConvolution.mm",
    "aten/src/ATen/native/metal/ops/MetalCopy.mm",
    "aten/src/ATen/native/metal/ops/MetalHardswish.mm",
    "aten/src/ATen/native/metal/ops/MetalLeakyReLU.mm",
    "aten/src/ATen/native/metal/ops/MetalNeurons.mm",
    "aten/src/ATen/native/metal/ops/MetalPadding.mm",
    "aten/src/ATen/native/metal/ops/MetalPooling.mm",
    "aten/src/ATen/native/metal/ops/MetalReduce.mm",
    "aten/src/ATen/native/metal/ops/MetalReshape.mm",
    "aten/src/ATen/native/metal/ops/MetalSoftmax.mm",
    "aten/src/ATen/native/metal/ops/MetalTranspose.mm",
    "aten/src/ATen/native/metal/ops/MetalUpsamplingNearest.mm",
 ]
 UNET_METAL_PREPACK_SOURCE_LIST = [
    "unet_metal_prepack.cpp",
    "unet_metal_prepack.mm",
 ]
 METAL_MASKRCNN_SOURCE_LIST = [
    "maskrcnn/srcs/GenerateProposals.mm",
    "maskrcnn/srcs/RoIAlign.mm",
 ]
 # The get_template_source_dict() returns a dict containing a path prefix
 # and a list of .cpp source files containing operator definitions and
 # registrations that should get selected via templated selective build.
 # The file selected_mobile_ops.h has the list of selected top level
 # operators.
 # NB: doesn't include generated files; copy_template_registration_files
 # handles those specially
 def get_template_source_dict():
    ret = {}
    for file_path in TEMPLATE_SOURCE_LIST:
        path_prefix = paths.dirname(file_path)
        if path_prefix not in ret:
            ret[path_prefix] = []
        ret[path_prefix].append(file_path)
    return ret
 def get_gen_oplist_outs():
    return {
        "SupportedMobileModelsRegistration.cpp": [
            "SupportedMobileModelsRegistration.cpp",
        ],
        "selected_mobile_ops.h": [
            "selected_mobile_ops.h",
        ],
        "selected_operators.yaml": [
            "selected_operators.yaml",
        ],
    }
 def get_generate_code_bin_outs():
    outs = {
        "autograd/generated/ADInplaceOrViewTypeEverything.cpp": ["autograd/generated/ADInplaceOrViewTypeEverything.cpp"],
        "autograd/generated/ADInplaceOrViewType_0.cpp": ["autograd/generated/ADInplaceOrViewType_0.cpp"],
        "autograd/generated/ADInplaceOrViewType_1.cpp": ["autograd/generated/ADInplaceOrViewType_1.cpp"],
        "autograd/generated/Functions.cpp": ["autograd/generated/Functions.cpp"],
        "autograd/generated/Functions.h": ["autograd/generated/Functions.h"],
        "autograd/generated/TraceTypeEverything.cpp": ["autograd/generated/TraceTypeEverything.cpp"],
        "autograd/generated/TraceType_0.cpp": ["autograd/generated/TraceType_0.cpp"],
        "autograd/generated/TraceType_1.cpp": ["autograd/generated/TraceType_1.cpp"],
        "autograd/generated/TraceType_2.cpp": ["autograd/generated/TraceType_2.cpp"],
        "autograd/generated/TraceType_3.cpp": ["autograd/generated/TraceType_3.cpp"],
        "autograd/generated/TraceType_4.cpp": ["autograd/generated/TraceType_4.cpp"],
        "autograd/generated/VariableType.h": ["autograd/generated/VariableType.h"],
        "autograd/generated/VariableTypeEverything.cpp": ["autograd/generated/VariableTypeEverything.cpp"],
        "autograd/generated/VariableType_0.cpp": ["autograd/generated/VariableType_0.cpp"],
        "autograd/generated/VariableType_1.cpp": ["autograd/generated/VariableType_1.cpp"],
        "autograd/generated/VariableType_2.cpp": ["autograd/generated/VariableType_2.cpp"],
        "autograd/generated/VariableType_3.cpp": ["autograd/generated/VariableType_3.cpp"],
        "autograd/generated/VariableType_4.cpp": ["autograd/generated/VariableType_4.cpp"],
        "autograd/generated/variable_factories.h": ["autograd/generated/variable_factories.h"],
    }
    if is_arvr_mode():
        outs.update({
            "autograd/generated/python_fft_functions.cpp": ["autograd/generated/python_fft_functions.cpp"],
            "autograd/generated/python_functions.h": ["autograd/generated/python_functions.h"],
            "autograd/generated/python_functions_0.cpp": ["autograd/generated/python_functions_0.cpp"],
            "autograd/generated/python_functions_1.cpp": ["autograd/generated/python_functions_1.cpp"],
            "autograd/generated/python_functions_2.cpp": ["autograd/generated/python_functions_2.cpp"],
            "autograd/generated/python_functions_3.cpp": ["autograd/generated/python_functions_3.cpp"],
            "autograd/generated/python_functions_4.cpp": ["autograd/generated/python_functions_4.cpp"],
            "autograd/generated/python_linalg_functions.cpp": ["autograd/generated/python_linalg_functions.cpp"],
            "autograd/generated/python_nn_functions.cpp": ["autograd/generated/python_nn_functions.cpp"],
            "autograd/generated/python_return_types.cpp": ["autograd/generated/python_return_types.cpp"],
            "autograd/generated/python_sparse_functions.cpp": ["autograd/generated/python_sparse_functions.cpp"],
            "autograd/generated/python_special_functions.cpp": ["autograd/generated/python_special_functions.cpp"],
            "autograd/generated/python_torch_functions_0.cpp": ["autograd/generated/python_torch_functions_0.cpp"],
            "autograd/generated/python_torch_functions_1.cpp": ["autograd/generated/python_torch_functions_1.cpp"],
            "autograd/generated/python_torch_functions_2.cpp": ["autograd/generated/python_torch_functions_2.cpp"],
            "autograd/generated/python_variable_methods.cpp": ["autograd/generated/python_variable_methods.cpp"],
        })
    return outs
 def get_template_registration_files_outs():
    outs = {}
    for file_path in TEMPLATE_MASKRCNN_SOURCE_LIST:
        outs[file_path] = [file_path]
    for file_path in TEMPLATE_BATCH_BOX_COX_SOURCE_LIST:
        outs[file_path] = [file_path]
    for file_path in TEMPLATE_SOURCE_LIST:
        outs[file_path] = [file_path]
    for base_name in aten_ufunc_generated_all_cpu_sources():
        file_path = "aten/src/ATen/{}".format(base_name)
        outs[file_path] = [file_path]
    return outs
 def get_template_registration_file_rules(rule_name):
    rules = []
    for file_path in TEMPLATE_SOURCE_LIST + TEMPLATE_MASKRCNN_SOURCE_LIST + TEMPLATE_BATCH_BOX_COX_SOURCE_LIST:
        rules.append(":{}[{}]".format(rule_name, file_path))
    for file_path in aten_ufunc_generated_all_cpu_sources():
        rules.append(":{}[aten/src/ATen/{}]".format(rule_name, file_path))
    return rules
 # ---------------------METAL RULES---------------------
 def get_metal_source_dict():
    ret = {}
    for file_path in METAL_SOURCE_LIST:
        path_prefix = paths.dirname(file_path)
        if path_prefix not in ret:
            ret[path_prefix] = []
        ret[path_prefix].append(file_path)
    return ret
 def get_metal_registration_files_outs():
    outs = {}
    for file_path in METAL_SOURCE_LIST:
        outs[file_path] = [file_path]
    for file_path in UNET_METAL_PREPACK_SOURCE_LIST:
        outs[file_path] = [file_path]
    for file_path in METAL_MASKRCNN_SOURCE_LIST:
        outs[file_path] = [file_path]
    return outs
 # There is a really weird issue with the arvr windows builds where
 # the custom op files are breaking them. See https://fburl.com/za87443c
 # The hack is just to not build them for that platform and pray they arent needed.
 def get_metal_registration_files_outs_windows():
    outs = {}
    for file_path in METAL_SOURCE_LIST:
        outs[file_path] = [file_path]
    return outs
 def get_metal_registration_files_rules(rule_name):
    ret = {}
    objc_rules = []
    cxx_rules = []
    for file_path in METAL_SOURCE_LIST + METAL_MASKRCNN_SOURCE_LIST + UNET_METAL_PREPACK_SOURCE_LIST:
        if ".cpp" not in file_path:
            objc_rules.append(":{}[{}]".format(rule_name, file_path))
        else:
            cxx_rules.append(":{}[{}]".format(rule_name, file_path))
    ret["objc"] = objc_rules
    ret["cxx"] = cxx_rules
    return ret
 def get_metal_registration_files_rules_windows(rule_name):
    ret = {}
    objc_rules = []
    cxx_rules = []
    for file_path in METAL_SOURCE_LIST:
        if ".cpp" not in file_path:
            objc_rules.append(":{}[{}]".format(rule_name, file_path))
        else:
            cxx_rules.append(":{}[{}]".format(rule_name, file_path))
    ret["objc"] = objc_rules
    ret["cxx"] = cxx_rules
    return ret
--- a/test/defs.bzl
+++ b/test/defs.bzl
@ -0,0 +1,112 @@
 load("@fbcode_macros//build_defs:python_pytest.bzl", "python_pytest")
 load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")
 load("@fbsource//tools/build_defs/sandcastle:sandcastle_defs.bzl", "is_sandcastle_machine")
 def define_python_unittest(pytest = False, **kwargs):
    build_mode = native.read_config("fbcode", "build_mode_test_label")
    enable_flatbuffer = bool(native.read_config("fbcode", "caffe2_enable_flatbuffer", None))
    PYTORCH_TEST_WITH_ASAN = "1" if ("asan" in build_mode or build_mode == "dev") else "0"
    PYTORCH_TEST_WITH_DEV_DBG_ASAN = "1" if (build_mode == "dev" or "dev-asan" in build_mode or "dbg-asan" in build_mode or "dbgo-asan" in build_mode) else "0"
    PYTORCH_TEST_WITH_TSAN = "1" if ("tsan" in build_mode) else "0"
    PYTORCH_TEST_WITH_UBSAN = "1" if ("ubsan" in build_mode or build_mode == "dev") else "0"
    NO_MULTIPROCESSING_SPAWN = "1" if is_sandcastle_machine() else "0"
    ENABLE_FLATBUFFER = "1" if enable_flatbuffer else "0"
    # indicates we are running in test env.
    # "deepcopy" the 'env: Dict[str, str]'
    kwargs["env"] = dict(kwargs.get("env", {}))
    kwargs["env"]["PYTORCH_TEST"] = "1"
    kwargs["env"]["PYTORCH_TEST_FBCODE"] = "1"
    kwargs["env"]["PYTORCH_TEST_WITH_ASAN"] = PYTORCH_TEST_WITH_ASAN
    kwargs["env"]["PYTORCH_TEST_WITH_DEV_DBG_ASAN"] = PYTORCH_TEST_WITH_DEV_DBG_ASAN
    kwargs["env"]["PYTORCH_TEST_WITH_TSAN"] = PYTORCH_TEST_WITH_TSAN
    kwargs["env"]["PYTORCH_TEST_WITH_UBSAN"] = PYTORCH_TEST_WITH_UBSAN
    kwargs["env"]["NO_MULTIPROCESSING_SPAWN"] = NO_MULTIPROCESSING_SPAWN
    kwargs["env"]["ENABLE_FLATBUFFER"] = ENABLE_FLATBUFFER
    # To speed up TP tests.
    kwargs["env"]["TENSORPIPE_TLS_DATACENTER"] = "test_dc"
    # Run CUDA tests on GPUs
    if kwargs.get("name").endswith("cuda"):
        # "deepcopy" the 'tags: List[str]'
        kwargs["tags"] = list(kwargs.get("tags", []))
        kwargs["tags"].extend([
            "re_opts_capabilities={\"platform\": \"gpu-remote-execution\", \"subplatform\": \"P100\"}",
            "supports_remote_execution",
            "run_as_bundle",
            "tpx:experimental-shard-size-for-bundle=100",
        ])
        kwargs["env"]["PYTORCH_TEST_REMOTE_GPU"] = "1"
    if pytest:
        python_pytest(
            **kwargs
        )
    else:
        python_unittest(
            **kwargs
        )
 def define_mp_tests(tests, additional_deps = None, pytest = False, **kwargs):
    # LeakSanitizer doesn't work for python multiprocessing.
    # See https://fb.workplace.com/groups/fbcode/posts/2625521060818050/
    # and https://fb.workplace.com/groups/101100140348621/posts/1278688645923092/
    extra_env = {
        "ASAN_OPTIONS": "detect_leaks=0",
        "CUDA_INJECTION64_PATH": "0",  # resolve kineto TSAN flakiness
    }
    # Serialize test cases since multiple tests running on same GPUs can
    # deadlock or there can be port conflicts.
    if "tags" not in kwargs:
        kwargs["tags"] = []
    if "serialize_test_cases" not in kwargs["tags"]:
        kwargs["tags"].append("serialize_test_cases")
    define_tests(tests, additional_deps, pytest, extra_env, **kwargs)
 def define_q_distributed_test(tests, env = None, additional_deps = None, pytest = False, **kwargs):
    define_tests(tests, additional_deps, pytest, env, **kwargs)
 def define_tests(tests, additional_deps = None, pytest = False, extra_env = {}, **kwargs):
    if additional_deps == None:
        additional_deps = {}
    provided_tags = kwargs.pop("tags", [])
    env = {
        "DOCS_SRC_DIR": "$(location //caffe2/docs/source:doc_files)",
        "MKL_NUM_THREADS": "1",
        "OMP_NUM_THREADS": "1",
        "SKIP_TEST_BOTTLENECK": "1",
    }
    env.update(extra_env)
    for name, srcs in tests.items():
        tags = list(provided_tags)
        test_deps = ["//caffe2:test-lib"] + additional_deps.get(name, [])
        define_python_unittest(
            pytest,
            name = name,
            srcs = srcs,
            base_module = "",
            compile = "with-source",
            env = env,
            py_version = ">=3.5",
            strip_libpar = True,
            tags = tags,
            deps = test_deps,
            # Depend directly on :libtorch so that tests won't be pruned by the
            # rdep distance heuristic.
            cpp_deps = ["//caffe2:libtorch"],
            runtime_deps = [
                "//caffe2/docs/source:doc_files",
            ],
            **kwargs
        )
--- a/test/distributed/defs.bzl
+++ b/test/distributed/defs.bzl
@ -0,0 +1,39 @@
 load("@fbsource//tools/build_defs:testpilot_defs.bzl", "special_tags")
 load(
    "//caffe2/test:defs.bzl",
    "define_python_unittest",
 )
 # These distributed tests need custom environment variables
 def define_distributed_test(**kwargs):
    # LeakSanitizer doesn't work for python multiprocessing.
    # See https://fb.workplace.com/groups/fbcode/posts/2625521060818050/
    # and https://fb.workplace.com/groups/101100140348621/posts/1278688645923092/
    kwargs["env"]["ASAN_OPTIONS"] = "detect_leaks=0"
    # Resolve kineto TSAN flakiness
    kwargs["env"]["CUDA_INJECTION64_PATH"] = "0"
    define_python_unittest(
        base_module = "",
        main_module = "fb.test_distributed_trap",
        py_version = ">=3.5",
        tags = [special_tags.run_as_bundle],
        deps = [
            "//caffe2:test-lib",
            "//caffe2:torch",
            "//caffe2/torch/fb/rendezvous:zeus",
            "//pytorch/vision:torchvision",
        ],
        external_deps = [
            ("numpy", None),
            ("scipy", None),
        ],
        **kwargs
    )
 def define_c10d_distributed_test(srcs, **kwargs):
    srcs.extend(["fb/test_distributed_trap.py"])
    define_distributed_test(
        srcs = srcs + native.glob(["data/*.py"]),
        **kwargs
    )
--- a/test/distributed/fsdp/defs.bzl
+++ b/test/distributed/fsdp/defs.bzl
@ -0,0 +1,22 @@
 load("@bazel_skylib//lib:paths.bzl", "paths")
 load(
    "//caffe2/test:defs.bzl",
    "define_mp_tests",
 )
 def define_fsdp_tests():
    test_files = native.glob(["**/test_*.py"])
    TESTS = {}
    additional_deps = {}
    for test_file in test_files:
        test_file_name = paths.basename(test_file)
        test_name = test_file_name.replace("test_", "").replace(".py", "")
        TESTS[test_name] = [test_file]
        additional_deps[test_name] = ["//pytorch/vision:torchvision"]
    define_mp_tests(
        tests = TESTS,
        additional_deps = additional_deps,
    )
--- a/test/distributed/pipeline/sync/defs.bzl
+++ b/test/distributed/pipeline/sync/defs.bzl
@ -0,0 +1,22 @@
 load("@bazel_skylib//lib:paths.bzl", "paths")
 load(
    "//caffe2/test:defs.bzl",
    "define_tests",
 )
 def define_pipeline_tests():
    test_files = native.glob(["**/test_*.py"])
    TESTS = {}
    for test_file in test_files:
        test_file_name = paths.basename(test_file)
        test_name = test_file_name.replace("test_", "").replace(".py", "")
        TESTS[test_name] = [test_file]
    define_tests(
        pytest = True,
        tests = TESTS,
        external_deps = [("pytest", None)],
        resources = ["conftest.py"],
    )
--- a/third_party/tensorflow_cuda_bazel_build/cuda/build_defs.bzl
+++ b/third_party/tensorflow_cuda_bazel_build/cuda/build_defs.bzl
@ -0,0 +1,31 @@
 # Macros for building CUDA code.
 def if_cuda(if_true, if_false = []):
    """Shorthand for select()'ing on whether we're building with CUDA.
    Returns a select statement which evaluates to if_true if we're building
    with CUDA enabled.  Otherwise, the select statement evaluates to if_false.
    """
    return select({
        "@local_config_cuda//cuda:using_clang": if_true,
        "@local_config_cuda//cuda:using_nvcc": if_true,
        "//conditions:default": if_false,
    })
 def cuda_default_copts():
    """Default options for all CUDA compilations."""
    return if_cuda(["-x", "cuda", "-DGOOGLE_CUDA=1"] + [])
 def cuda_is_configured():
    """Returns true if CUDA was enabled during the configure process."""
    return True
 def if_cuda_is_configured(x):
    """Tests if the CUDA was enabled during the configure process.
    Unlike if_cuda(), this does not require that we are building with
    --config=cuda. Used to allow non-CUDA code to depend on CUDA libraries.
    """
    if cuda_is_configured():
        return x
    return []
--- a/tools/cpuinfo_target_definition.bzl
+++ b/tools/cpuinfo_target_definition.bzl
@ -0,0 +1,12 @@
 load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
 load("//caffe2/tools:sgx_target_definitions.bzl", "is_sgx")
 def add_cpuinfo_lib():
    cpp_library(
        name = "cpuinfo",
        exported_deps = [
            "fbsource//third-party/cpuinfo_sgx:cpuinfo_coffeelake",
        ] if is_sgx else [
            "fbsource//third-party/cpuinfo:cpuinfo",
        ],
    )
--- a/tools/miniz_target_definition.bzl
+++ b/tools/miniz_target_definition.bzl
@ -0,0 +1,25 @@
 load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
 load("//caffe2/tools:sgx_target_definitions.bzl", "is_sgx")
 def add_miniz_lib():
    cpp_library(
        name = "miniz",
        srcs = [
            "third_party/miniz-2.0.8/fb/FollyCrcPlugin.cpp",
            "third_party/miniz-2.0.8/fb/miniz-fb.c",
        ],
        headers = {
            "caffe2/third_party/miniz-2.0.8/miniz.c": "third_party/miniz-2.0.8/miniz.c",
            "miniz-fb.h": "third_party/miniz-2.0.8/fb/miniz-fb.h",
            "miniz.h": "third_party/miniz-2.0.8/miniz.h",
        },
        header_namespace = "",
        # -fexceptions is required, otherwise, when we use @mode/opt-clang-thinlto,
        # c functions become noexcept, and we may not be able to catch exceptions
        # during model loading.
        compiler_flags = ["-DUSE_EXTERNAL_MZCRC", "-fexceptions"] + (["-DMINIZ_NO_STDIO"] if is_sgx else []),
        # folly is only required as a dependency if USE_EXTERNAL_MZCRC
        # above is defined, and FollyCrcPlugin.cpp is added.
        # Neither are strictly needed, but run significantly faster.
        exported_deps = ["//folly/hash:checksum"],
    )
--- a/tools/perf_kernel_defs.bzl
+++ b/tools/perf_kernel_defs.bzl
@ -0,0 +1,54 @@
 load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
 is_dbg_build = native.read_config("fbcode", "build_mode", "").find("dbg") != -1
 is_sanitizer = native.read_config("fbcode", "sanitizer", "") != ""
 def define_perf_kernels(prefix, levels_and_flags, compiler_common_flags, dependencies, external_deps):
    vectorize_flags = ([
        # "-Rpass=loop-vectorize", # Add vectorization information to output
        "-DENABLE_VECTORIZATION=1",
        "-fveclib=SVML",
    ] if not is_dbg_build and not is_sanitizer else [])
    compiler_specific_flags = {
        "clang": vectorize_flags,
        "gcc": [],
    }
    compiler_specific_flags["clang"] += ["-Wno-pass-failed"]
    common_srcs = native.glob(
        ["**/*.cc"],
        exclude = [
            "**/*_avx512.cc",
            "**/*_avx2.cc",
            "**/*_avx.cc",
        ],
    )
    cpp_headers = native.glob(
        ["**/*.h"],
    )
    kernel_targets = []
    for level, flags in levels_and_flags:
        cpp_library(
            name = prefix + "perfkernels_" + level,
            srcs = native.glob(["**/*_" + level + ".cc"]),
            headers = cpp_headers,
            compiler_flags = compiler_common_flags + flags,
            compiler_specific_flags = compiler_specific_flags,
            exported_deps = dependencies,
            exported_external_deps = external_deps,
        )
        kernel_targets.append(":" + prefix + "perfkernels_" + level)
    cpp_library(
        name = prefix + "perfkernels",
        srcs = common_srcs,
        headers = cpp_headers,
        compiler_flags = compiler_common_flags,
        compiler_specific_flags = compiler_specific_flags,
        link_whole = True,
        exported_deps = kernel_targets + dependencies,
    )
--- a/tools/rules/METADATA.bzl
+++ b/tools/rules/METADATA.bzl
@ -0,0 +1,9 @@
 # THIS FILE IS AUTOMATICALLY GENERATED FROM INFORMATION STORED IN
 # THIRD-PARTY METADATA SERVICE. YOUR MANUAL CHANGES TO THIS FILE WILL
 # BE PRESERVED AND WILL SERVE AS THE SOURCE OF TRUTH FOR METADATA OF
 # THIS PACKAGE.
 # TPMS-GENERATED: b3448f8fd2a893772f944f37627e63917b77dede
 METADATA = {
    "name": "rules",
    "owner": "pytorch_dev_infra",
 }
--- a/tools/sgx_aten_target_definitions.bzl
+++ b/tools/sgx_aten_target_definitions.bzl
@ -0,0 +1,261 @@
 load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
 load("@fbcode_macros//build_defs:custom_rule.bzl", "custom_rule")
 load("//caffe2:build.bzl", "GENERATED_CPP")
 load("//caffe2:build_variables.bzl", "jit_core_headers", "jit_core_sources")
 load("//caffe2/tools:sgx_target_definitions.bzl", "is_sgx")
 default_compiler_flags = [
    "-Wno-error=strict-aliasing",
    "-Wno-unused-local-typedefs",
    "-Wno-shadow-compatible-local",
    "-Wno-maybe-uninitialized",  # aten is built with gcc as part of HHVM
    "-Wno-unknown-pragmas",
    "-Wno-strict-overflow",
    # See https://fb.facebook.com/groups/fbcode/permalink/1813348245368673/
    # These trigger on platform007
    "-Wno-stringop-overflow",
    "-Wno-class-memaccess",
    "-DHAVE_MMAP",
    "-DUSE_GCC_ATOMICS=1",
    "-D_FILE_OFFSET_BITS=64",
    "-DHAVE_SHM_OPEN=1",
    "-DHAVE_SHM_UNLINK=1",
    "-DHAVE_MALLOC_USABLE_SIZE=1",
    "-DTH_HAVE_THREAD",
    "-DCPU_CAPABILITY_DEFAULT",
    "-DTH_INDEX_BASE=0",
    "-DMAGMA_V2",
    "-DNO_CUDNN_DESTROY_HANDLE",
    "-DUSE_QNNPACK",
    "-DUSE_PYTORCH_QNNPACK",
    # The dynamically loaded NVRTC trick doesn't work in fbcode,
    # and it's not necessary anyway, because we have a stub
    # nvrtc library which we load canonically anyway
    "-DUSE_DIRECT_NVRTC",
    "-DUSE_XNNPACK",
    "-Wno-error=uninitialized",
 ]
 compiler_specific_flags = {
    "clang": [
        "-Wno-absolute-value",
        "-Wno-pass-failed",
        "-Wno-braced-scalar-init",
    ],
    "gcc": [
        "-Wno-error=array-bounds",
    ],
 }
 def add_sgx_aten_libs(ATEN_HEADERS_CPU_MKL, ATEN_SRCS_CPU_MKL, ATEN_CORE_CPP):
    # we do not need to define these targets if we are in not SGX mode
    if not is_sgx:
        return
    x64_compiler_flags = [
        "-DUSE_SSE2",
        "-DUSE_SSE3",
        "-DUSE_SSE4_1",
        "-DUSE_SSE4_2",
        # dont enable AVX2 because we dont have runtime dispatch
        "-DCPU_CAPABILITY_DEFAULT",
        "-DCPU_CAPABILITY=DEFAULT",
        "-DTH_INDEX_BASE=0",
        "-DTH_INDEX_BASE=0",
        "-msse",
        "-msse2",
        "-msse3",
        "-msse4",
        "-msse4.1",
        "-msse4.2",
        "-mavx",
        "-mavx2",
    ]
    cpu_preprocessor_flags = [
        "-DATEN_MKLDNN_ENABLED_FBCODE=0",
        "-DATEN_NNPACK_ENABLED_FBCODE=0",
        "-DATEN_MKL_ENABLED_FBCODE=0",
        "-DAT_BUILD_WITH_BLAS_FBCODE=1",
        "-DAT_BLAS_USE_CBLAS_DOT_FBCODE=1",
        "-DAT_BLAS_F2C_FBCODE=0",
        "-DATEN_CUDNN_ENABLED_FBCODE=1",
        "-DATEN_ROCM_ENABLED_FBCODE=0",
        "-DC10_MOBILE",
        "-DAT_PARALLEL_NATIVE_FBCODE=1",
    ]
    custom_rule(
        name = "generate-sgx-config",
        srcs = [
            "src/ATen/Config.h.in",
        ],
        build_args = " ".join([
            "--input-file",
            "src/ATen/Config.h.in",
            "--output-file",
            "Config.h",
            "--replace",
            "@AT_MKLDNN_ENABLED@",
            "0",
            "--replace",
            "@AT_MKL_ENABLED@",
            "0",
            "--replace",
            "@AT_MKL_SEQUENTIAL@",
            "0",
            "--replace",
            "@AT_FFTW_ENABLED@",
            "0",
            "--replace",
            "@AT_POCKETFFT_ENABLED@",
            "0",
            "--replace",
            "@AT_NNPACK_ENABLED@",
            "ATEN_NNPACK_ENABLED_FBCODE",
            "--replace",
            "@AT_BUILD_WITH_BLAS@",
            "1",
            "--replace",
            "@AT_BUILD_WITH_LAPACK@",
            "0",
            "--replace",
            "@CAFFE2_STATIC_LINK_CUDA_INT@",
            "0",
            "--replace",
            "@AT_BLAS_F2C@",
            "AT_BLAS_F2C_FBCODE",
            "--replace",
            "@AT_BLAS_USE_CBLAS_DOT@",
            "AT_BLAS_USE_CBLAS_DOT_FBCODE",
            "--replace",
            "@AT_PARALLEL_OPENMP@",
            "0",
            "--replace",
            "@AT_PARALLEL_NATIVE@",
            "1",
            "--replace",
            "@AT_PARALLEL_NATIVE_TBB@",
            "0",
        ]),
        build_script_dep = "//caffe2:substitute",
        output_gen_files = ["Config.h"],
    )
    cpp_library(
        name = "generated-sgx-config-header",
        headers = [":generate-sgx-config=Config.h"],
        header_namespace = "ATen",
    )
    ATEN_CORE_H = native.glob([
        "src/ATen/core/*.h",
        "src/ATen/core/boxing/*.h",
        "src/ATen/core/boxing/impl/*.h",
        "src/ATen/core/dispatch/*.h",
        "src/ATen/core/op_registration/*.h",
    ]) + [
        "src/ATen/CPUGeneratorImpl.h",
        "src/ATen/NumericUtils.h",
    ]
    cpp_library(
        name = "ATen-core-sgx-headers",
        headers = ATEN_CORE_H,
        propagated_pp_flags = [
            "-Icaffe2/aten/src",
        ],
        exported_deps = [
            "//caffe2:generated-aten-headers-core",
            "//caffe2/c10:c10",
        ],
    )
    cpp_library(
        name = "ATen-sgx-core",
        # Sorry, this is duped with GENERATED_CPP_CORE.  I was too lazy to refactor
        # the list into a bzl file
        srcs = ATEN_CORE_CPP + [
            ":gen_aten=Operators_0.cpp",
            ":gen_aten=Operators_1.cpp",
            ":gen_aten=Operators_2.cpp",
            ":gen_aten=Operators_3.cpp",
            ":gen_aten=Operators_4.cpp",
            ":gen_aten=core/ATenOpList.cpp",
            ":gen_aten=core/TensorMethods.cpp",
        ],
        headers = native.glob([
            "src/ATen/*.h",
            "src/ATen/ops/*.h",
            "src/ATen/quantized/*.h",
        ]),
        compiler_flags = default_compiler_flags,
        compiler_specific_flags = compiler_specific_flags,
        link_whole = True,
        # Tests that fail in CPU static dispatch mode because they require
        # the dispatcher in order to work can be gated out with `#ifndef
        # ATEN_CPU_STATIC_DISPATCH`.
        propagated_pp_flags = [],
        # Must be linked with caffe2_core
        undefined_symbols = True,
        exported_deps = [
            ":ATen-core-sgx-headers",
            "//caffe2:jit-core-sgx",
        ],
    )
    cpp_library(
        name = "ATen-sgx-cpu",
        srcs = ATEN_SRCS_CPU_MKL + [":gen_aten=" + x for x in GENERATED_CPP],
        headers = ATEN_HEADERS_CPU_MKL,
        arch_compiler_flags = {"x86_64": x64_compiler_flags},
        compiler_flags = default_compiler_flags,
        compiler_specific_flags = compiler_specific_flags,
        include_directories = [
            "src",
            "src/TH",
        ],
        link_whole = True,
        propagated_pp_flags = cpu_preprocessor_flags,
        exported_deps = [
            "fbsource//third-party/cpuinfo_sgx:cpuinfo_coffeelake",
            ":ATen-sgx-core",
            ":aten-headers-cpu",
            ":generated-aten-headers-cpu",
            ":generated-sgx-config-header",
            ":generated-sgx-th-general-header",
            ":generated-sgx-th-general-header-no-prefix",
            "//caffe2/caffe2:caffe2_sgx_core",
            "//caffe2/caffe2/perfkernels:sgx_perfkernels",
            "//xplat/third-party/XNNPACK:XNNPACK",
        ],
        exported_external_deps = [
            ("OpenBLAS", None, "OpenBLAS"),
        ],
        deps = [
            "//caffe2/aten/src/ATen/native/quantized/cpu/qnnpack:pytorch_qnnpack",
        ],
    )
 def add_sgx_aten_jit_libs():
    # we do not need to define these targets if we are in not SGX mode
    if not is_sgx:
        return
    cpp_library(
        name = "jit-core-sgx",
        # Sorry, this is duped with GENERATED_CPP_CORE.  I was too lazy to refactor
        # the list into a bzl file
        srcs = jit_core_sources,
        headers = jit_core_headers,
        compiler_flags = default_compiler_flags,
        compiler_specific_flags = compiler_specific_flags,
        include_directories = [""],
        link_whole = True,
        # Must be linked with caffe2_core
        undefined_symbols = True,
        exported_deps = [
            "//caffe2:ATen-core-sgx-headers",
            "//caffe2/c10:c10",
        ],
    )
--- a/tools/sgx_caffe2_target_definitions.bzl
+++ b/tools/sgx_caffe2_target_definitions.bzl
@ -0,0 +1,253 @@
 load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
 load("//caffe2/caffe2:defs.bzl", "get_sgx_patterns")
 load("//caffe2/tools:perf_kernel_defs.bzl", "define_perf_kernels")
 load("//caffe2/tools:sgx_target_definitions.bzl", "is_sgx")
 def add_sgx_caffe_libs():
    # we do not need to define these targets if we are in not SGX mode
    if not is_sgx:
        return
    core_file_patterns = [
        "core/allocator.cc",
        "core/logging.cc",
        "core/flags.cc",
        "core/common.cc",
        "core/context.cc",
        "core/event.cc",
        "core/context_base.cc",
        "core/numa.cc",
        "core/blob_serialization.cc",
        "core/tensor.cc",
        "core/types.cc",
        "core/blob_stats.cc",
        "opt/converter.cc",
        "opt/annotations.cc",
        "utils/cpuid.cc",
        "utils/threadpool/ThreadPool.cc",
        "utils/threadpool/pthreadpool-cpp.cc",
        "utils/threadpool/thread_pool_guard.cpp",
        "utils/proto_utils.cc",
    ]
    core_srcs = native.glob(
        core_file_patterns,
    )
    core_external_deps = [
        "protobuf",
        "glog",
        "sparsehash",
        "zstd",
    ]
    core_internal_deps = [
        "fbsource//third-party/fmt:fmt",
        "//caffe/proto:fb_protobuf",
        "//caffe2/caffe2/proto:fb_protobuf",
        "//caffe2/c10:c10",
        "//common/base:exception",
        "//common/logging:logging",
    ]
    internal_deps = core_internal_deps + [
        # "//libfb/py/mkl:mkl_dep_handle_lp64",
        "//onnx/onnx:onnx_lib",
        "//foxi:foxi_loader",
        "//caffe2/caffe2/fb/onnxifi:fbonnxifi_loader_stub",
        # "//rocksdb:rocksdb",
        "//caffe2:cpuinfo",
        "//xplat/QNNPACK:QNNPACK",
        "//folly/experimental/symbolizer:symbolizer",
        "//folly/hash:hash",
        "//folly/io:iobuf",
        "//folly:conv",
        "//folly:dynamic",
        "//folly:executor",
        "//folly:format",
        "//folly:json",
        "//folly:map_util",
        "//folly:memory",
        "//folly:mpmc_queue",
        "//folly:optional",
        "//folly:random",
        "//folly:range",
        "//folly/synchronization:rw_spin_lock",
        "//folly:singleton",
        "//folly:string",
        "//folly:synchronized",
        "//folly:thread_local",
        "//folly:traits",
        "//caffe2:ATen-core-headers",
        # important dependency to claim space for future refactorings
        "//caffe2:ATen-cpu",
        "//caffe2/caffe2/perfkernels:perfkernels",
        "//xplat/third-party/FP16:FP16",
        "fbsource//third-party/neon2sse:neon2sse",
    ]
    exclude = [
        # hip files are obtained from defs_hip.bzl
        # do not include in the cpu/cuda build
        "**/hip/**/*",
        "test/caffe2_gtest_main.cc",
        "quantization/server/**/*",
        "fb/async/comm/**/*",
        "fb/monitoring/**/*",
        "fb/session/**/*",
        # utils/knobs.cc and utils/knob_patcher.cc are only used in the open-source build
        # The internal build uses versions from fb/utils/ instead.
        "utils/knobs.cc",
        "utils/knob_patcher.cc",
    ]
    core_file_patterns = [
        "core/allocator.cc",
        "core/logging.cc",
        "core/flags.cc",
        "core/common.cc",
        "core/context.cc",
        "core/event.cc",
        "core/context_base.cc",
        "core/numa.cc",
        "core/blob_serialization.cc",
        "core/tensor.cc",
        "core/types.cc",
        "core/blob_stats.cc",
        "opt/converter.cc",
        "opt/annotations.cc",
        "utils/cpuid.cc",
        "utils/threadpool/ThreadPool.cc",
        "utils/threadpool/pthreadpool-cpp.cc",
        "utils/threadpool/thread_pool_guard.cpp",
        "utils/proto_utils.cc",
    ]
    test_file_patterns = get_sgx_patterns([
        "_test.cc",
        "_test.cpp",
    ])
    gpu_file_patterns = get_sgx_patterns([
        "_gpu.cc",
        "_cudnn.cc",
    ])
    cpu_file_patterns = get_sgx_patterns([
        ".cc",
        ".cpp",
    ])
    cpp_srcs = native.glob(
        cpu_file_patterns,
        exclude = exclude + gpu_file_patterns + test_file_patterns + core_file_patterns,
    )
    pp_flags = [
        "-Icaffe2",
        "-Imodules",
        "-DEIGEN_NO_DEBUG",
        "-DCAFFE2_USE_GOOGLE_GLOG",
        "-DCAFFE2_NO_CROSS_ARCH_WARNING",
        "-DCAFFE2_USE_EXCEPTION_PTR",
        # Work-around for incompatible thread pools in Caffe2 and NNPACK
        "-DFBCODE_CAFFE2",
        "-DUSE_PTHREADPOOL",
        "-DC10_MOBILE",
    ]
    compiler_flags = [
        "-Wno-unknown-pragmas",
        "-Wno-narrowing",
        "-Wno-missing-braces",
        "-Wno-strict-overflow",
        "-mno-avx",
        "-Wno-error=unused-result",
    ]
    cpu_header_patterns = [
        "**/*.h",
    ]
    cpp_headers = native.glob(
        cpu_header_patterns,
        exclude = exclude,
    )
    cpp_library(
        name = "caffe2_sgx_headers",
        headers = cpp_headers,
        propagated_pp_flags = pp_flags,
        exported_deps = core_internal_deps + [
            "//folly/io/async:async_base",
            "//caffe2/aten:ATen-core-sgx-headers",
        ],
        exported_external_deps = core_external_deps,
    )
    cpp_library(
        name = "caffe2_sgx_core",
        srcs = core_srcs + [
            "serialize/inline_container.cc",
            "serialize/crc.cc",
            "serialize/file_adapter.cc",
            "serialize/istream_adapter.cc",
            "serialize/read_adapter_interface.cc",
        ],
        compiler_flags = compiler_flags,
        link_whole = True,
        propagated_pp_flags = pp_flags,
        exported_deps = core_internal_deps + [
            "//caffe2/aten:ATen-sgx-core",
            "//caffe2/caffe2/core/nomnigraph:nomnigraph",
            "//xplat/third-party/pthreadpool:pthreadpool",
            "//caffe2:miniz",
        ],
        exported_external_deps = core_external_deps,
    )
 def add_sgx_perf_kernel_libs():
    # we do not need to define these targets if we are in not SGX mode
    if not is_sgx:
        return
    dependencies = [
        "//caffe2/caffe2:caffe2_sgx_headers",
        "//caffe2/aten:ATen-core-sgx-headers",
    ]
    compiler_common_flags = [
        "-DCAFFE2_PERF_WITH_AVX2",
        "-DCAFFE2_PERF_WITH_AVX",
    ]
    external_deps = []
    # these are esentially disabled for hte sgx build but we still need them
    # to avoid linking issues
    levels_and_flags = ([
        (
            "avx2",
            [
                "-mavx2",
                "-mfma",
                "-mavx",
                "-mf16c",
            ],
        ),
        (
            "avx",
            [
                "-mavx",
                "-mf16c",
            ],
        ),
    ])
    define_perf_kernels(
        prefix = "sgx_",
        levels_and_flags = levels_and_flags,
        compiler_common_flags = compiler_common_flags,
        dependencies = dependencies,
        external_deps = external_deps,
    )
--- a/tools/sgx_target_definitions.bzl
+++ b/tools/sgx_target_definitions.bzl
@ -0,0 +1,96 @@
 load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
 load("@fbsource//tools/build_defs:buckconfig.bzl", "read_bool")
 load(
    "//caffe2:build_variables.bzl",
    "core_sources_common",
    "core_sources_full_mobile",
    "core_trainer_sources",
    "libtorch_extra_sources",
    "libtorch_generated_sources",
 )
 is_sgx = read_bool("fbcode", "sgx_mode", False)
 def libtorch_sgx_sources(gencode_pattern = ":generate-code[{}]"):
    libtorch_core_mobile_sources = sorted(core_sources_common + core_sources_full_mobile + core_trainer_sources)
    sgx_sources_to_exclude = [
        "torch/csrc/jit/tensorexpr/llvm_codegen.cpp",
        "torch/csrc/jit/tensorexpr/llvm_jit.cpp",
        "torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp",
    ]
    return libtorch_generated_sources(gencode_pattern) + [i for i in libtorch_core_mobile_sources if i not in sgx_sources_to_exclude] + [i for i in libtorch_extra_sources if i not in sgx_sources_to_exclude]
 def add_sgx_torch_libs():
    # we do not need to define these targets if we are in not SGX mode
    if not is_sgx:
        return
    compiler_flags_cpu = [
        "-DNO_CUDNN_DESTROY_HANDLE",
        "-DPYTORCH_ONNX_CAFFE2_BUNDLE",
        "-DTORCH_ENABLE_LLVM",
        "-Wno-write-strings",
        "-Wno-format",
        "-Wno-strict-aliasing",
        "-Wno-non-virtual-dtor",
        "-Wno-shadow-compatible-local",
        "-Wno-empty-body",
        "-DUSE_XNNPACK",
    ]
    propagated_pp_flags_cpu = [
        "-DSYMBOLICATE_MOBILE_DEBUG_HANDLE",
        "-DC10_MOBILE",
    ]
    include_directories = [
        "..",
        ".",
        "torch/csrc/api/include",
        "torch/csrc",
        "torch/csrc/nn",
        "torch/lib",
    ]
    common_flags = {
        "compiler_specific_flags": {
            "clang": [
                "-Wno-absolute-value",
                "-Wno-expansion-to-defined",
                "-Wno-pessimizing-move",
                "-Wno-return-type-c-linkage",
                "-Wno-unknown-pragmas",
            ],
        },
        "headers": native.glob(["torch/csrc/**/*.h", "torch/csrc/generic/*.cpp", "test/cpp/jit/*.h", "test/cpp/tensorexpr/*.h"]),
    }
    _libtorch_sgx_sources = list(libtorch_sgx_sources())
    cpp_library(
        name = "libtorch-sgx",
        srcs = _libtorch_sgx_sources + [
            "fb/supported_mobile_models/SupportedMobileModels.cpp",
            "torch/csrc/jit/mobile/function.cpp",
            "torch/csrc/jit/mobile/import.cpp",
            "torch/csrc/jit/mobile/interpreter.cpp",
            "torch/csrc/jit/mobile/module.cpp",  # this is only needed to load the model from caffe2/test/cpp/lite_interpreter_runtime/delegate_test.ptl
        ],
        link_whole = True,
        include_directories = include_directories,
        propagated_pp_flags = propagated_pp_flags_cpu,
        exported_deps = [
            ":generated-autograd-headers",
            ":generated-version-header",
            "//caffe2/aten:ATen-sgx-cpu",
            "//caffe2/caffe2:caffe2_sgx_core",
            "//onnx/onnx:onnx_lib",
        ],
        exported_external_deps = [
            ("protobuf", None),
        ],
        compiler_flags = compiler_flags_cpu,
        **common_flags
    )
--- a/tools/target_definitions.bzl
+++ b/tools/target_definitions.bzl
@ -0,0 +1,568 @@
 # @lint-ignore-every BUCKLINT supress the warning for using native
 load("@bazel_skylib//lib:paths.bzl", "paths")
 load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
 load("@fbcode_macros//build_defs:cpp_python_extension.bzl", "cpp_python_extension")
 load("@fbcode_macros//build_defs:custom_rule.bzl", "custom_rule")
 load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")
 load("@fbsource//tools/build_defs:glob_defs.bzl", "glob")
 load(
    "//caffe2:build_variables.bzl",
    "glob_libtorch_python_sources",
    "libtorch_cuda_sources",
    "libtorch_nvfuser_generated_headers",
    "libtorch_nvfuser_runtime_sources",
    "libtorch_python_cuda_sources",
    "libtorch_sources",
    "torch_cpp_srcs",
 )
 load(
    "//caffe2:defs_hip.bzl",
    "get_hip_flags",
    "hip_external_deps",
    "hip_pp_flags",
 )
 load("//caffe2/caffe2/fb:defs_gpu.bzl", "gpu_library_selector", "gpu_library_targets", "is_amd_build")
 load("//tools/build/buck:nccl_deps.bzl", "get_nccl_dependency")
 def _path_to_filename(fname):
    return paths.split_extension(paths.basename(fname))[0]
 def use_kineto():
    return native.host_info().os.is_linux and native.host_info().arch.is_x86_64 and not is_amd_build()
 def add_torch_libs():
    r = {}
    torch_cpp_headers = glob(["torch/csrc/api/include/**/*.h"]) + ["torch/script.h"]
    libtorch_python_sources = glob_libtorch_python_sources()
    use_mpi = native.read_config("fbcode", "caffe2_use_mpi", None)
    enable_flatbuffer = bool(native.read_config("fbcode", "caffe2_enable_flatbuffer", None))
    compiler_flags_cpu = [
        "-DUSE_C10D",
        "-DUSE_NUMPY",
        "-DUSE_SCALARS",
        "-DNO_CUDNN_DESTROY_HANDLE",
        "-DBUILD_CAFFE2",
        "-DTORCH_ENABLE_LLVM",
        "-Wno-write-strings",
        "-Wno-format",
        "-Wno-strict-aliasing",
        "-Wno-non-virtual-dtor",
        "-Wno-shadow-compatible-local",
        "-Wno-empty-body",
    ] + ([] if native.host_info().os.is_windows else [
        # XNNPACK depends on an updated version of pthreadpool interface, whose implementation
        # includes <pthread.h> - a header not available on Windows.
        "-DUSE_XNNPACK",
    ])
    # We should really include preprocessor flags here
    # instead of compiler_flags
    propagated_pp_flags_cpu = [
        "-DSYMBOLICATE_MOBILE_DEBUG_HANDLE",
        "-DUSE_DISTRIBUTED",
        "-DUSE_C10D_GLOO",
        "-DUSE_RPC",
        "-DUSE_TENSORPIPE",
    ] + (
        ["-DUSE_C10D_MPI"] if use_mpi else []
    ) + (
        ["-DUSE_KINETO", "-DUSE_KINETO_UPDATED"] if use_kineto() else []
    ) + (
        ["-DENABLE_LIBKINETO_CLIENT"] if native.read_config("kineto", "enable_libkineto_client", "1") == "1" else []
    )
    compiler_flags_cuda = [
        "-DUSE_CUDNN",
        "-DUSE_NCCL",
    ]
    compiler_flags_hip = []
    propagated_pp_flags_cuda = [
        "-DUSE_CUDA",
        "-DUSE_C10D_NCCL",
    ]
    common_headers = glob([
        "torch/csrc/**/*.h",
        # c10d used to be a separate library whose includes ended in .hpp.
        "torch/csrc/distributed/c10d/*.hpp",
        "torch/csrc/generic/*.cpp",
    ]) + [
        "torch/csrc/deploy/Exception.h",
        "torch/csrc/deploy/deploy.h",
        "torch/csrc/deploy/elf_file.h",
        "torch/csrc/deploy/environment.h",
        "torch/csrc/deploy/interpreter/builtin_registry.h",
        "torch/csrc/deploy/interpreter/interpreter_impl.h",
        "torch/csrc/deploy/loader.h",
        "torch/csrc/deploy/mem_file.h",
        "torch/csrc/deploy/noop_environment.h",
        "torch/csrc/deploy/path_environment.h",
        "torch/csrc/deploy/unity/tests/test_unity.h",
        "torch/csrc/deploy/unity/xar_environment.h",
        "torch/csrc/distributed/rpc/metrics/RpcMetricsHandler.h",
        "test/cpp/jit/test_custom_class_registrations.h",
        "test/cpp/jit/test_utils.h",
        "test/cpp/tensorexpr/gtest_assert_float_eq.h",
        "test/cpp/tensorexpr/padded_buffer.h",
        "test/cpp/tensorexpr/test_base.h",
        "test/cpp/tensorexpr/test_utils.h",
    ]
    common_headers.remove("torch/csrc/jit/serialization/mobile_bytecode_generated.h")
    common_flags = {
        "compiler_specific_flags": {
            "clang": [
                "-Wno-absolute-value",
                "-Wno-expansion-to-defined",
                "-Wno-pessimizing-move",
                "-Wno-return-type-c-linkage",
                "-Wno-unknown-pragmas",
            ],
        },
        "headers": common_headers,
    }
    include_directories = [
        "..",
        ".",
        "torch/csrc/api/include",
        "torch/csrc",
        # c10d used to be a separate library and its includes were c10d/Foo.hpp,
        # hence we now need this hack to keep supporting them.
        "torch/csrc/distributed",
        "torch/csrc/nn",
    ]
    _libtorch_sources = list(libtorch_sources())
    # Add the Gloo and TensorPipe backends specific to Facebook networking.
    _libtorch_sources.append("torch/csrc/distributed/c10d/fb/GlooDeviceFactory.cpp")
    _libtorch_sources.append("torch/csrc/distributed/rpc/fb/tensorpipe_agent.cpp")
    cpp_library(
        name = "libtorch",
        srcs = _libtorch_sources + ([
            "torch/csrc/jit/serialization/flatbuffer_serializer.cpp",
            "torch/csrc/jit/serialization/flatbuffer_serializer_jit.cpp",
            "torch/csrc/jit/mobile/flatbuffer_loader.cpp",
        ] if enable_flatbuffer else []),
        link_whole = True,
        include_directories = include_directories,
        propagated_pp_flags = propagated_pp_flags_cpu + (["-DENABLE_FLATBUFFER"] if enable_flatbuffer else []),
        exported_deps = (
            [
                ":ATen-cpu",
                ":generated-autograd-headers",
                ":generated-lazy-headers",
                "//caffe2:version_cpp",
                "//caffe2/caffe2:caffe2_cpu",
                "//caffe2/caffe2/quantization/server:dnnlowp_ops",
                "//caffe2/caffe2/serialize:inline_container",
                "//caffe2/torch/lib/libshm:libshm",
                "//gloo:gloo",
                "//gloo/fb/transport/tls:tls",
                "//gloo/transport/tcp:tcp",
                "//tensorpipe:tensorpipe_cpu",
            ] + (["//kineto/libkineto:kineto"] if use_kineto() else []) +
            (["//caffe2:mobile_bytecode"] if enable_flatbuffer else [])
        ),
        exported_external_deps = [
            ("nanopb", None, "protobuf-nanopb"),
            ("protobuf", None),
            ("llvm-fb", None, "LLVMAnalysis"),
            ("llvm-fb", None, "LLVMBPFAsmParser"),
            ("llvm-fb", None, "LLVMBPFCodeGen"),
            ("llvm-fb", None, "LLVMCodeGen"),
            ("llvm-fb", None, "LLVMCore"),
            ("llvm-fb", None, "LLVMExecutionEngine"),
            ("llvm-fb", None, "LLVMIRReader"),
            ("llvm-fb", None, "LLVMInstCombine"),
            ("llvm-fb", None, "LLVMInterpreter"),
            ("llvm-fb", None, "LLVMMC"),
            ("llvm-fb", None, "LLVMNVPTXCodeGen"),
            ("llvm-fb", None, "LLVMOrcJIT"),
            ("llvm-fb", None, "LLVMRISCVAsmParser"),
            ("llvm-fb", None, "LLVMRISCVCodeGen"),
            ("llvm-fb", None, "LLVMScalarOpts"),
            ("llvm-fb", None, "LLVMSupport"),
            ("llvm-fb", None, "LLVMTarget"),
            ("llvm-fb", None, "LLVMTransformUtils"),
            ("llvm-fb", None, "LLVMVectorize"),
            ("llvm-fb", None, "LLVMWebAssemblyAsmParser"),
            ("llvm-fb", None, "LLVMWebAssemblyCodeGen"),
            ("llvm-fb", None, "LLVMWebAssemblyInfo"),
            ("llvm-fb", None, "LLVMX86AsmParser"),
            ("llvm-fb", None, "LLVMX86CodeGen"),
            ("llvm-fb", None, "LLVMipo"),
        ] + ([("openmpi", None, "openmpi")] if use_mpi else []),
        compiler_flags = compiler_flags_cpu,
        **common_flags
    )
    # Below rules are used to stringify NVfuser runtime library into a header files
    python_binary(
        name = "nvfuser-stringify",
        srcs = ["torch/csrc/jit/codegen/cuda/tools/stringify_file.py"],
        base_module = "",
        main_module = "torch.csrc.jit.codegen.cuda.tools.stringify_file",
    )
    # files in libtorch_nvfuser_runtime_sources that are violating package boundaries
    # are mapped to their corresponding export_file rules.
    violation_paths_to_rule = {
        "aten/src/ATen/cuda/detail/PhiloxCudaStateRaw.cuh": ":aten/src/ATen/cuda/detail/PhiloxCudaStateRaw.cuh",
        "aten/src/ATen/cuda/detail/UnpackRaw.cuh": ":aten/src/ATen/cuda/detail/UnpackRaw.cuh",
    }
    for name in libtorch_nvfuser_runtime_sources:
        src_path = violation_paths_to_rule.get(name, name)
        filename = _path_to_filename(src_path)
        native.genrule(
            name = "gen-nvfuser-hdr={}.h".format(filename),
            srcs = {name: src_path},
            bash = "$(exe :nvfuser-stringify) -i $SRCDIR/{} -o $OUT".format(name),
            out = "{}.h".format(filename),
        )
    cpp_library(
        name = "generated-nvfuser-headers",
        headers = [":gen-nvfuser-hdr=" + x for x in libtorch_nvfuser_generated_headers],
        header_namespace = "nvfuser_resources",
    )
    _libtorch_cuda_sources = list(libtorch_cuda_sources)
    cpp_library(
        name = "libtorch_cuda",
        srcs = _libtorch_cuda_sources,
        link_whole = True,
        include_directories = include_directories,
        # TODO: putting USE_CUDA in propagated_pp_flags is error-prone
        propagated_pp_flags = propagated_pp_flags_cuda,
        exported_deps = [
            ":ATen",
            ":generated-aten-headers-cuda",
            ":generated-autograd-headers",
            ":generated-nvfuser-headers",
            ":libtorch",
            "//caffe2/caffe2:caffe2_cpu",
            "//caffe2/caffe2:caffe2_gpu",
            "//caffe2/torch/lib/libshm:libshm",
            "//gloo:gloo_gpu_cuda",
            "//tensorpipe:tensorpipe_cuda",
        ],
        exported_external_deps = [
            ("cudnn", None, "cudnn-lazy"),
            ("cuda", None, "nvToolsExt-lazy"),
            ("cuda", None, "nvrtc-lazy"),
            ("cuda", None, "nvrtc-builtins-lazy"),
        ] + get_nccl_dependency(),
        compiler_flags = compiler_flags_cpu + compiler_flags_cuda,
        **common_flags
    )
    # (original_paths, hipified_paths)
    libtorch_hip_headers_filter = torch_cpp_headers + [h for h in common_headers if any([h.startswith(d) for d in [
        # headers in the following directories are added to libtorch_hip_headers_filter
        # so that they are not hipified.
        "torch/csrc/deploy/",
        "torch/csrc/distributed/rpc/metrics/",
        "torch/csrc/jit/serialization/",
        "torch/cpp/jit/",
        "torch/cpp/tensorexpr/",
    ]])]
    libtorch_hip_sources = (libtorch_cuda_sources, [f.replace(".cu", ".hip") for f in libtorch_cuda_sources])
    libtorch_hip_headers = ([f for f in common_headers if f not in libtorch_hip_headers_filter],) * 2
    custom_rule(
        name = "fb_libtorch_hipify_gen",
        srcs = libtorch_hip_sources[0] + libtorch_hip_headers[0],
        build_args = "--source-dir= --hipify-dir= --copy-dir= --rewrite-cu-ext",
        build_script_dep = "//caffe2:fb_caffe2_hipify",
        output_gen_files = libtorch_hip_sources[1] + libtorch_hip_headers[1],
    )
    cpp_library(
        name = "libtorch_hip_headers",
        headers = [":fb_libtorch_hipify_gen={}".format(f) for f in libtorch_hip_headers[1]],
        header_namespace = "",
    )
    cpp_library(
        name = "libtorch_hip",
        srcs = [":fb_libtorch_hipify_gen={}".format(f) for f in libtorch_hip_sources[1]],
        headers = [f for f in common_headers if f in libtorch_hip_headers_filter],
        link_whole = True,
        propagated_pp_flags = hip_pp_flags,
        exported_deps = [
            ":generated-aten-headers-hip",
            ":generated-autograd-headers",
            ":generated-nvfuser-headers",
            ":libtorch",
            ":libtorch_hip_headers",
            "//caffe2:ATen-hip",
            "//caffe2/caffe2:caffe2_cpu",
            "//caffe2/caffe2:caffe2_gpu_hip",
            "//caffe2/torch/lib/libshm:libshm",
            "//gloo:gloo_gpu_hip",
            "//tensorpipe:tensorpipe_cpu",  # TODO: include a HIP version once it's developed
        ],
        exported_external_deps = hip_external_deps,
        compiler_flags = compiler_flags_cpu + compiler_flags_hip + [
            "-Wno-unused-result",
        ],
        hip_flags = ["-Wno-unused-result"] + get_hip_flags(),
        compiler_specific_flags = common_flags["compiler_specific_flags"],
    )
    gpu_library_targets(
        name = "libtorch_gpu",
        deps_cpu = [
            ":libtorch",
        ],
        deps_cuda = [
            ":libtorch_cuda",
        ],
        deps_hip = [
            ":libtorch_hip",
        ],
        exclude_hip_target = False,
        extra_external_deps = [],
    )
    # torch-cpp is still conditionally compiled based on USE_CUDA. Ideally we'd
    # separate it out as an additive library instead.
    gpu_library_selector(
        name = "torch-cpp",
        deps_cpu = [":torch-cpp-cpu"],
        deps_cuda = [":torch-cpp-cuda"],
        deps_hip = [":torch-cpp-hip"],
        merge_cpu_deps = False,
        exclude_hip_target = False,
    )
    # USE_CUDA flag is propagated through propagated_pp_flags on libtorch
    cpp_library(
        name = "torch-cpp-cuda",
        srcs = torch_cpp_srcs,
        headers = torch_cpp_headers,
        include_directories = [
            ".",
            "torch/csrc/api/include/",
        ],
        exported_deps = [
            ":libtorch_cuda",
            "//caffe2/torch/fb/init:init",
        ],
        exported_external_deps = [
            ("cuda", None, "cuda-lazy"),
            ("cudnn", None, "cudnn-lazy"),
        ],
    )
    cpp_library(
        name = "torch-cpp-hip",
        srcs = torch_cpp_srcs,
        headers = torch_cpp_headers,
        include_directories = [
            ".",
            "torch/csrc/api/include/",
        ],
        exported_deps = [
            ":libtorch_hip",
            "//caffe2/torch/fb/init:init",
        ],
        exported_external_deps = hip_external_deps,
    )
    cpp_library(
        name = "torch-cpp-cpu",
        srcs = torch_cpp_srcs,
        headers = torch_cpp_headers,
        include_directories = [
            ".",
            "torch/csrc/api/include/",
        ],
        exported_deps = [
            ":libtorch",
            "//caffe2/torch/fb/init:init",
        ],
    )
    # _C_impl is still conditionally compiled based on USE_CUDA. Ideally we'd
    # separate it out as an additive library instead.
    # TODO: split it into cpp and cuda parts similarly to libtorch
    gpu_library_selector(
        name = "_C_impl",
        deps_cpu = [":_C_impl_cpu"],
        deps_cuda = [":_C_impl_cuda"],
        deps_hip = [":_C_impl_hip"],
        merge_cpu_deps = False,
        exclude_hip_target = False,
    )
    cpp_library(
        name = "_C_impl_cpu",
        srcs = libtorch_python_sources,
        link_whole = True,
        exported_deps = [
            "fbsource//third-party/fmt:fmt",
            ":torch-cpp-cpu",
            "//caffe2/torch/fb/init:init",
            "//caffe2/torch/lib/libshm:libshm",
        ],
        exported_external_deps = [
            ("numpy", None, "cpp"),
            ("pybind11", None),
            ("python", None),
        ],
        compiler_flags = compiler_flags_cpu,
        compiler_specific_flags = common_flags["compiler_specific_flags"],
    )
    # This target is used to help get headers for compile-time deps for torch::deploy
    # libinterpreter.so build _without_ getting link-time deps, which are supplied
    # separately by the application that dlopens libinterpreter.so.
    #
    # We make use of the buck auto-generated #headers flavor of a target to accomplish this.
    #
    # However, since #headers flavor of target with srcs can't be used in all build modes, we
    # work around this limitation by using this 'pass-through' target, which has a usable
    # #headers flavor in all build modes.
    cpp_library(
        name = "headers_for_torch_python_deps",
        exported_deps = [
            ":_C_impl_cpu",
        ],
    )
    cpp_library(
        name = "headers_for_torch_python_cuda_deps",
        exported_deps = [
            ":_C_impl_cuda",
        ],
    )
    # This target compiles torch_python bindings, but skips the deps on actual
    # torch and python since those will be integrated specially in the wrapper for
    # libinterpreter.so used in torch::deploy
    cpp_library(
        name = "torch_python_without_torch",
        srcs = libtorch_python_sources + torch_cpp_srcs,
        undefined_symbols = True,
        preferred_linkage = "static",
        exported_deps = [
            ":headers_for_torch_python_deps#headers",
        ],
        exported_external_deps = [
            ("pybind11", None),
            ("frozenpython", None, "python-headers"),
        ],
        compiler_flags = compiler_flags_cpu + [
            # some code in the Python bindings compiles differently
            # when you are deploy
            "-DUSE_DEPLOY",
        ],
        compiler_specific_flags = common_flags["compiler_specific_flags"],
    )
    cpp_library(
        name = "torch_python_cuda_without_torch",
        srcs = libtorch_python_sources + torch_cpp_srcs + libtorch_python_cuda_sources,
        undefined_symbols = True,
        preferred_linkage = "static",
        exported_deps = [
            ":headers_for_torch_python_cuda_deps#headers",
        ],
        exported_external_deps = [
            ("pybind11", None),
            ("frozenpython", None, "python-headers"),
        ],
        compiler_flags = compiler_flags_cpu + [
            "-DUSE_CUDA",
            # some code in the Python bindings compiles differently
            # when you are deploy
            "-DUSE_DEPLOY",
        ],
        compiler_specific_flags = common_flags["compiler_specific_flags"],
    )
    cpp_library(
        name = "_C_impl_cuda",
        srcs = libtorch_python_sources + libtorch_python_cuda_sources,
        link_whole = True,
        exported_deps = [
            "fbsource//third-party/fmt:fmt",
            ":torch-cpp-cuda",
            "//caffe2/torch/fb/init:init",
            "//caffe2/torch/lib/libshm:libshm",
        ],
        exported_external_deps = [
            ("numpy", None, "cpp"),
            ("pybind11", None),
            ("python", None),
        ],
        compiler_flags = compiler_flags_cpu + compiler_flags_cuda,
        compiler_specific_flags = common_flags["compiler_specific_flags"],
    )
    # Autogenerated files whose rules contain ":" are not hipified.
    libtorch_python_hip_sources = [f for f in (libtorch_python_sources + libtorch_python_cuda_sources) if ":" in f]
    libtorch_python_hip_sources_hipified = [f for f in (libtorch_python_sources + libtorch_python_cuda_sources) if not ":" in f]
    custom_rule(
        name = "fb_C_impl_hipify_gen",
        srcs = libtorch_python_hip_sources_hipified,
        build_args = "--source-dir= --hipify-dir= --copy-dir=",
        build_script_dep = "//caffe2:fb_caffe2_hipify",
        output_gen_files = libtorch_python_hip_sources_hipified,
    )
    cpp_library(
        name = "_C_impl_hip",
        srcs = [":fb_C_impl_hipify_gen={}".format(f) for f in (libtorch_python_hip_sources_hipified)] + libtorch_python_hip_sources,
        link_whole = True,
        exported_deps = [
            "fbsource//third-party/fmt:fmt",
            ":torch-cpp-hip",
            "//caffe2/torch/fb/init:init",
            "//caffe2/torch/lib/libshm:libshm",
        ],
        exported_external_deps = [
            ("numpy", None, "cpp"),
            ("pybind11", None),
            ("python", None),
        ],
        compiler_flags = compiler_flags_cpu + compiler_flags_hip + ["-Wno-unused-result"],
        compiler_specific_flags = common_flags["compiler_specific_flags"],
    )
    cpp_python_extension(
        name = "_C",
        srcs = [
            "torch/csrc/stub.c",
        ],
        base_module = "torch",
        deps = [
            ":_C_impl",
            "//caffe2:flatbuffer_loader",
        ],
    )
    cpp_python_extension(
        name = "_C_flatbuffer",
        srcs = [
            "torch/csrc/stub_with_flatbuffer.c",
            "torch/csrc/init_flatbuffer_module.cpp",
        ],
        base_module = "torch",
        deps = [
            ":_C_impl",
            "//caffe2:flatbuffer_loader",
            "//caffe2:flatbuffer_serializer",
        ],
    )
    return r
--- a/torch/csrc/deploy/interpreter/defs.bzl
+++ b/torch/csrc/deploy/interpreter/defs.bzl
@ -0,0 +1,117 @@
 load("@fbcode_macros//build_defs:cpp_binary.bzl", "cpp_binary")
 load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
 load("@fbcode_macros//build_defs:native_rules.bzl", "cxx_genrule")
 # @lint-ignore-every BUCKLINT
 load("@fbsource//tools/build_defs:fb_native_wrapper.bzl", "fb_native")
 def embedded_interpreter(name, suffix, legacy = False, exported_deps = [], exported_external_deps = []):
    final_name = name
    is_all = suffix == "all"
    is_cuda = suffix == "cuda" or is_all
    platform_static_lib = []
    for platform in ["platform009", "platform010"]:
        name = platform + "_" + final_name
        so_name = name + ".so"
        cpp_binary(
            name = so_name,
            srcs = [
                "interpreter_impl.cpp",
            ] + (["import_find_sharedfuncptr.cpp"] if is_all else []),
            headers = [
                "Optional.hpp",
                "interpreter_impl.h",
            ],
            header_namespace = "torch/csrc/deploy",
            dlopen_enabled = True,
            linker_flags = ([
                # This ensures only the intended interface symbols are public/global
                # the rest are hidden, regardless of how they were compiled
                # (e.g. fvisibility=hidden is NOT important for the component
                # objs in this library, since we override here.)
                "--version-script=$(location :hide_symbols.script)",
            ] if not is_all else []),
            deps = [
                "fbsource//third-party/fmt:fmt",
            ] + ([
                ":builtin_registry_cuda",
                "//caffe2:torch_python_cuda_without_torch",
                "//deeplearning/trt/python:frozen_tensorrt",
            ] if is_cuda else [
                ":builtin_registry",
                "//caffe2:torch_python_without_torch",
            ]),
            external_deps =
                [
                    # needed for interpreter.cpp itself, it uses pybind currently
                    ("frozenpython", None, "python-frozen"),
                    ("frozenpython", None, "python"),
                ],
            fbcode_platform = platform,
        )
        # We build torch::deploy with two embedded binaries- one with only cpu py bindings,
        # the other with cpu+cuda py bindings.  This unfortunately wastes some binary size,
        # but at least at runtime only one of them is loaded.
        #
        # This is becuase of two reasons
        # (1) that applications such as predictor want to depend on torch::deploy in a
        # cuda-agnostic way, e.g. they don't choose yet, and a binary/app that depends
        # on predictor either chooses to include or not include a dep on cuda.
        #
        # (2) the way the embedded binary is created and loaded, it only exposes a small
        # set of interface symbols globally, for creating a new interpreter, and hides its
        # other symbols (esp. python ones) so they don't conflict with other interpreters.
        # This prevents dividing the cpu and cuda portions of bindings into _separate_ libs
        # and loading the cuda part additively.  Hence to achieve requirement (1) we bundle
        # two complete interpreter libs, one with and one without cuda.
        cp_cmd = "$(location //caffe2/torch/csrc/deploy:remove_dt_needed)" if suffix == "all" else "cp"
        build_name = "build_" + name
        if not legacy:
            cxx_genrule(
                name = build_name,
                out = "embedded_interpreter_" + suffix + ".a",
                cmd = """\
                """ + cp_cmd + """ $(location :""" + so_name + """) libtorch_deployinterpreter_internal_""" + suffix + """.so
                ld -r -b binary -o ${TMP}/embedded_interpreter_""" + suffix + """.o libtorch_deployinterpreter_internal_""" + suffix + """.so
                objcopy --rename-section .data=.torch_deploy_payload.interpreter_""" + suffix + """,readonly,contents -N _binary_libtorch_deployinterpreter_""" + suffix + """_so_start -N _binary_libtorch_deployinterpreter_""" + suffix + """_so_end ${TMP}/embedded_interpreter_""" + suffix + """.o
                ar rcs ${OUT} ${TMP}/embedded_interpreter_""" + suffix + """.o
                """,
            )
        else:
            cxx_genrule(
                name = build_name,
                out = "embedded_interpreter_cuda_legacy.a",
                cmd = """\
                cp $(location :""" + so_name + """) libtorch_deployinterpreter_cuda.so
                ld -r -b binary -o ${TMP}/embedded_interpreter_cuda.o libtorch_deployinterpreter_cuda.so
                ar rcs ${OUT} ${TMP}/embedded_interpreter_cuda.o
                """,
            )
        platform_static_lib.append(["^" + platform, ":" + build_name])
    internal_name = final_name + "_internal"
    fb_native.prebuilt_cxx_library(
        preferred_linkage = "static",
        name = internal_name,
        visibility = ["PUBLIC"],
        link_whole = True,
        platform_static_lib = platform_static_lib,
    )
    # a thin wrapper around :embedded_interpreter_internal to add --export-dynamic
    # linker flags. The flag will be propagated to cpp_binary. We don't require
    # cpp_binary to explicitly enable --export-dynamic any more. New usecases usually
    # forgot to do so and caused interpreter not found crash.
    cpp_library(
        name = final_name,
        linker_flags = [
            "--export-dynamic",
        ],
        exported_deps = [
            ":" + internal_name,
        ] + exported_deps,
        exported_external_deps = exported_external_deps,
    )
--- a/torch/csrc/deploy/unity/unity.bzl
+++ b/torch/csrc/deploy/unity/unity.bzl
@ -0,0 +1,46 @@
 load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
 load("@fbcode_macros//build_defs:native_rules.bzl", "cxx_genrule")
 load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")
 # @lint-ignore-every BUCKLINT
 load("@fbsource//tools/build_defs:fb_native_wrapper.bzl", "fb_native")
 def build_unity(name, **kwargs):
    python_binary(name = name, **kwargs)
    cxx_genrule(
        name = "{}_build_python_app_lib".format(name),
        out = "python_app.a",
        cmd = """\
        cp $(location :""" + name + """) python_app
        ld -r -b binary -o ${TMP}/python_app.o python_app
        # rename the .data section to .torch_deploy_payload.unity.
        # don't set the alloc/load flags for the section so it will not join
        # the party of relocation.
        # Also strip the _binary_python_app_start/end/size symbols to avoid
        # confusion.
        objcopy --rename-section .data=.torch_deploy_payload.unity,readonly,contents -N  _binary_python_app_start -N  _binary_python_app_end -N  _binary_python_app_size ${TMP}/python_app.o
        ar rcs ${OUT} ${TMP}/python_app.o
        """,
    )
    fb_native.prebuilt_cxx_library(
        name = "{}_python_app_lib".format(name),
        visibility = ["PUBLIC"],
        link_whole = True,
        preferred_linkage = "static",
        static_lib = ":{}_build_python_app_lib".format(name),
    )
    cpp_library(
        name = "{}_unity_lib".format(name),
        srcs = [
        ],
        linker_flags = [
            "--export-dynamic",
        ],
        exported_deps = [
            "//caffe2/torch/csrc/deploy/unity:unity_core",
            ":{}_python_app_lib".format(name),
        ],
    )