diff --git a/android/build_defs.bzl b/android/build_defs.bzl
new file mode 100644
index 000000000000..5e8497a6923b
--- /dev/null
+++ b/android/build_defs.bzl
@@ -0,0 +1,19 @@
+load("@fbsource//tools/build_defs:fb_xplat_cxx_test.bzl", "fb_xplat_cxx_test")
+load("@fbsource//xplat/caffe2:pt_defs.bzl", "get_build_from_deps_query", "pt_operator_registry")
+
+DEFAULT_PT_OP_DEPS = [
+    "fbsource//xplat/caffe2:torch_mobile_ops_full_dev",
+]
+
+def pt_xplat_cxx_test(name, deps = [], pt_op_deps = DEFAULT_PT_OP_DEPS, **kwargs):
+    code_gen_lib = []
+    if get_build_from_deps_query():
+        lib_name = name + "_lib"
+        pt_operator_registry(lib_name, preferred_linkage = "static", template_select = False, deps = pt_op_deps)
+        code_gen_lib = [":" + lib_name]
+        deps = deps + code_gen_lib
+    fb_xplat_cxx_test(
+        name = name,
+        deps = deps,
+        **kwargs
+    )
diff --git a/c10/c10_defs.bzl b/c10/c10_defs.bzl
new file mode 100644
index 000000000000..55fb9fc35e5d
--- /dev/null
+++ b/c10/c10_defs.bzl
@@ -0,0 +1,29 @@
+load("@fbsource//tools/build_defs:expect.bzl", "expect")
+load(
+    "@fbsource//tools/build_defs/apple:build_mode_defs.bzl",
+    "is_production_build",
+)
+
+###############################################################################
+# Check if we need to strip glog.
+def _get_strip_glog_config():
+    c2_strip_glog = native.read_config("caffe2", "strip_glog", "1")
+    expect(
+        c2_strip_glog in ("0", "1"),
+        c2_strip_glog,
+    )
+    return bool(int(c2_strip_glog))
+
+# For iOS production builds (and all Android builds), strip GLOG logging to
+# save size. We can disable by setting caffe2.strip_glog=0 in .buckconfig.local.
+def get_fbobjc_strip_glog_flags():
+    if is_production_build() or _get_strip_glog_config():
+        return ["-UGOOGLE_STRIP_LOG", "-DGOOGLE_STRIP_LOG=3"]
+    else:
+        return ["-UGOOGLE_STRIP_LOG"]
+
+def get_fbandroid_strip_glog_flags():
+    if _get_strip_glog_config():
+        return ["-UGOOGLE_STRIP_LOG", "-DGOOGLE_STRIP_LOG=1"]
+    else:
+        return []
diff --git a/c10/defs_hip.bzl b/c10/defs_hip.bzl
new file mode 100644
index 000000000000..5084758b62e6
--- /dev/null
+++ b/c10/defs_hip.bzl
@@ -0,0 +1,126 @@
+load("@bazel_skylib//lib:paths.bzl", "paths")
+load("//caffe2:defs_hip.bzl", "get_hip_file_path")
+
+gpu_file_extensions = [".cu", ".c", ".cc", ".cpp"]
+gpu_header_extensions = [".cuh", ".h", ".hpp"]
+
+def is_test_files(filepath):
+    if filepath.startswith("test"):
+        return True
+    else:
+        return False
+
+def get_c10_hip_srcs():
+    gpu_file_pattern = [
+        base + suffix
+        for base in c10_includes
+        for suffix in gpu_file_extensions
+    ]
+    native_gpu_files = native.glob(gpu_file_pattern)
+
+    gpu_files = []
+    hip_files = []
+    for name in native_gpu_files:
+        # exclude the test folder
+        if is_test_files(name):
+            continue
+
+        gpu_files.append(name)
+        hip_file_name = get_hip_file_path(paths.join("cuda/", name))
+        hip_files.append(hip_file_name)
+
+    # there will be some native hip files that needs suffix changed
+    native_hip_pattern = [
+        "hip/**/*.hip",
+    ]
+    native_hip_files = native.glob(native_hip_pattern)
+
+    gpu_files += native_hip_files
+    hip_files += native_hip_files
+
+    # we run hipify script under the caffe2 folder; therefore we need to
+    # prepend c10 to the path so that buck can find the hipified file
+    real_hip_files = []
+    for filename in hip_files:
+        real_hip_files.append(paths.join("c10", filename))
+
+    # return the src and output_gen files
+    return gpu_files, real_hip_files
+
+def get_c10_hip_headers():
+    gpu_file_pattern = [
+        base + suffix
+        for base in c10_includes
+        for suffix in gpu_header_extensions
+    ]
+    native_gpu_files = native.glob(gpu_file_pattern)
+
+    # store the original
+    gpu_files = []
+    hip_files = []
+    for name in native_gpu_files:
+        if is_test_files(name):
+            continue
+
+        gpu_files.append(name)
+        hip_file_name = get_hip_file_path(paths.join("cuda/", name))
+        hip_files.append(hip_file_name)
+
+    # there will be some native hip files that needs suffix changed
+    native_hip_pattern = [
+        "hip/**/*" + suffix
+        for suffix in gpu_header_extensions
+    ]
+    native_hip_files = native.glob(native_hip_pattern)
+
+    gpu_files += native_hip_files
+    hip_files += native_hip_files
+
+    # we run hipify script under the caffe2 folder; therefore we need to
+    # prepend c10 to the path so that buck can find the hipified file
+    real_hip_files = []
+    for filename in hip_files:
+        real_hip_files.append(paths.join("c10", filename))
+
+    # return the src and output_gen files
+    return gpu_files, real_hip_files
+
+def get_c10_hip_test_files():
+    gpu_file_pattern = [
+        base + suffix
+        for base in c10_includes
+        for suffix in gpu_file_extensions
+    ]
+    native_gpu_files = native.glob(gpu_file_pattern)
+
+    # store the original
+    gpu_files = []
+    hip_files = []
+    for name in native_gpu_files:
+        if not is_test_files(name):
+            continue
+
+        gpu_files.append(name)
+        hip_file_name = get_hip_file_path(paths.join("cuda/", name))
+        hip_files.append(hip_file_name)
+
+    # there will be some native hip files that needs suffix changed
+    native_hip_pattern = [
+        "hip/test/**/*" + suffix
+        for suffix in gpu_header_extensions
+    ]
+    native_hip_files = native.glob(native_hip_pattern)
+
+    gpu_files += native_hip_files
+    hip_files += native_hip_files
+
+    # we run hipify script under the caffe2 folder; therefore we need to
+    # prepend c10 to the path so that buck can find the hipified file
+    real_hip_files = []
+    for filename in hip_files:
+        real_hip_files.append(paths.join("c10", filename))
+
+    # return the src and output_gen files
+    return gpu_files, real_hip_files
+
+c10_includes = ["**/*"]
diff --git a/c10/ovrsource_defs.bzl b/c10/ovrsource_defs.bzl
new file mode 100644
index 000000000000..8d23920007a0
--- /dev/null
+++ b/c10/ovrsource_defs.bzl
@@ -0,0 +1,276 @@
+load("//arvr/tools/build_defs:genrule_utils.bzl", "gen_cmake_header")
+load("//arvr/tools/build_defs:oxx.bzl", "oxx_static_library")
+
+cpu_supported_platforms = [
+    "ovr_config//os:android",
+    "ovr_config//os:iphoneos",
+    "ovr_config//os:linux-x86_64",
+    "ovr_config//os:macos",
+    "ovr_config//os:windows-x86_64",
+    "ovr_config//runtime:arm64-linux-ubuntu-neon",
+]
+
+cuda_supported_platforms = [
+    "ovr_config//os:linux-cuda",
+    "ovr_config//os:windows-cuda",
+]
+
+def define_c10_ovrsource(name, is_mobile):
+    if is_mobile:
+        pp_flags = ["-DC10_MOBILE=1"]
+    else:
+        pp_flags = []
+
+    oxx_static_library(
+        name = name,
+        srcs = native.glob([
+            "core/*.cpp",
+            "core/impl/*.cpp",
+            "mobile/*.cpp",
+            "util/*.cpp",
+        ]),
+        compatible_with = cpu_supported_platforms,
+        compiler_flags = select({
+            "DEFAULT": [],
+            "ovr_config//compiler:cl": [
+                "/w",
+            ],
+            "ovr_config//toolchain/clang:win": [
+                "-Wno-error",
+                "-Wno-shadow",
+                "-Wno-undef",
+                "-Wno-unused-variable",
+            ],
+        }),
+        include_directories = [".."],
+        preprocessor_flags = [
+            "-DNO_EXPORT",
+            "-DC10_BUILD_MAIN_LIB=1",
+            "-DSUPPORTS_BACKTRACE=0",
+        ],
+        public_include_directories = [".."],
+        public_preprocessor_flags = pp_flags,
+        public_raw_headers = native.glob([
+            "core/*.h",
+            "macros/*.h",
+            "mobile/*.h",
+            "test/util/*.h",  # some external tests use this
+            "util/*.h",
+        ]),
+        raw_headers = native.glob([
+            "core/impl/*.h",
+        ]),
+        reexport_all_header_dependencies = False,
+        # tests = C10_CPU_TEST_TARGETS,
+        visibility = [
+            "//xplat/caffe2/c10:c10_ovrsource",
+        ],
+        deps = select({
+            "DEFAULT": [],
+            "ovr_config//os:linux": [
+                "//third-party/numactl:numactl",
+            ],
+        }),
+        exported_deps = [
+            ":ovrsource_c10_cmake_macros.h",
+            "//arvr/third-party/gflags:gflags",
+            "//third-party/glog:glog",
+            "//third-party/fmt:fmt",
+        ],
+    )
+
+def define_ovrsource_targets():
+    # C10_CPU_TEST_FILES = native.glob([
+    #     "test/core/*.cpp",
+    #     "test/util/*.cpp",
+    # ])
+
+    # C10_GPU_TEST_FILES = native.glob([
+    #     "cuda/test/**/*.cpp",
+    # ])
+
+    # C10_CPU_TEST_TARGETS = [
+    #     ":" + paths.basename(test)[:-len(".cpp")] + "_ovrsource"
+    #     for test in C10_CPU_TEST_FILES
+    # ]
+
+    # C10_GPU_TEST_TARGETS = [
+    #     ":" + paths.basename(test)[:-len(".cpp")] + "_ovrsource"
+    #     for test in C10_GPU_TEST_FILES
+    # ]
+
+    common_c10_cmake_defines = [
+        ("#cmakedefine C10_BUILD_SHARED_LIBS", ""),
+        ("#cmakedefine C10_DISABLE_NUMA", ""),
+        ("#cmakedefine C10_USE_NUMA", ""),
+        ("#cmakedefine C10_USE_MSVC_STATIC_RUNTIME", ""),
+    ]
+
+    mobile_c10_cmake_defines = [
+        ("#cmakedefine C10_USE_GLOG", ""),
+        ("#cmakedefine C10_USE_GFLAGS", ""),
+    ]
+
+    non_mobile_c10_cmake_defines = [
+        ("#cmakedefine C10_USE_GLOG", "#define C10_USE_GLOG 1"),
+        ("#cmakedefine C10_USE_GFLAGS", "#define C10_USE_GFLAGS 1"),
+    ]
+
+    gen_cmake_header(
+        src = "macros/cmake_macros.h.in",
+        defines = common_c10_cmake_defines + mobile_c10_cmake_defines,
+        header = "c10/macros/cmake_macros.h",
+        prefix = "ovrsource_c10_mobile_",
+    )
+
+    gen_cmake_header(
+        src = "macros/cmake_macros.h.in",
+        defines = common_c10_cmake_defines + non_mobile_c10_cmake_defines,
+        header = "c10/macros/cmake_macros.h",
+        prefix = "ovrsource_c10_non_mobile_",
+    )
+
+    oxx_static_library(
+        name = "ovrsource_c10_cmake_macros.h",
+        compatible_with = [
+            "ovr_config//os:android",
+            "ovr_config//os:iphoneos",
+            "ovr_config//os:linux",
+            "ovr_config//os:macos",
+            "ovr_config//os:windows",
+        ],
+        deps = select({
+            "ovr_config//os:android": [":ovrsource_c10_mobile_cmake_macros.h"],
+            "ovr_config//os:iphoneos": [":ovrsource_c10_mobile_cmake_macros.h"],
+            "ovr_config//os:linux": [":ovrsource_c10_non_mobile_cmake_macros.h"],
+            "ovr_config//os:macos": [":ovrsource_c10_non_mobile_cmake_macros.h"],
+            "ovr_config//os:windows": [":ovrsource_c10_non_mobile_cmake_macros.h"],
+        }),
+    )
+
+    c10_cuda_macros = gen_cmake_header(
+        src = "cuda/impl/cuda_cmake_macros.h.in",
+        defines = [
+            ("#cmakedefine C10_CUDA_BUILD_SHARED_LIBS", ""),
+        ],
+        header = "c10/cuda/impl/cuda_cmake_macros.h",
+        prefix = "ovrsource",
+    )
+
+    oxx_static_library(
+        name = "c10_ovrsource",
+        compatible_with = cpu_supported_platforms,
+        exported_deps = select({
+            "DEFAULT": [":c10_full_ovrsource"],
+            "ovr_config//os:android": [":c10_mobile_ovrsource"],
+            "ovr_config//os:iphoneos": [":c10_mobile_ovrsource"],
+        }),
+        visibility = ["PUBLIC"],
+    )
+
+    """
+    Most users should use c10_ovrsource, not these targets directly.
+    """
+    define_c10_ovrsource("c10_mobile_ovrsource", True)
+    define_c10_ovrsource("c10_full_ovrsource", False)
+
+    oxx_static_library(
+        name = "c10_cuda_ovrsource",
+        srcs = native.glob([
+            "cuda/*.cpp",
+            "cuda/impl/*.cpp",
+        ]),
+        compatible_with = cuda_supported_platforms,
+        compiler_flags = select({
+            "DEFAULT": [],
+            "ovr_config//compiler:cl": [
+                "/w",
+            ],
+            "ovr_config//toolchain/clang:win": [
+                "-Wno-error",
+                "-Wno-shadow",
+                "-Wno-undef",
+                "-Wno-unused-variable",
+            ],
+        }),
+        link_whole = True,
+        preprocessor_flags = [
+            "-DNO_EXPORT",
+            "-DC10_CUDA_BUILD_MAIN_LIB=1",
+        ],
+        raw_headers = native.glob([
+            "cuda/*.h",
+            "cuda/impl/*.h",
+        ]),
+        reexport_all_header_dependencies = False,
+        # tests = C10_GPU_TEST_TARGETS,
+        visibility = ["PUBLIC"],
+        deps = [
+            "//third-party/cuda:libcuda",
+            "//third-party/cuda:libcudart",
+        ],
+        exported_deps = c10_cuda_macros + [
+            ":c10_ovrsource",
+        ],
+    )
+
+    # [
+    #     oxx_test(
+    #         name = paths.basename(test)[:-len(".cpp")] + "_ovrsource",
+    #         srcs = [test],
+    #         compatible_with = cpu_supported_platforms,
+    #         compiler_flags = select({
+    #             "DEFAULT": [],
+    #             "ovr_config//compiler:cl": [
+    #                 "/w",
+    #             ],
+    #             "ovr_config//compiler:clang": [
+    #                 "-Wno-error",
+    #                 "-Wno-self-assign-overloaded",
+    #                 "-Wno-self-move",
+    #                 "-Wno-shadow",
+    #                 "-Wno-undef",
+    #                 "-Wno-unused-function",
+    #                 "-Wno-unused-variable",
+    #             ],
+    #         }),
+    #         framework = "gtest",
+    #         oncall = "ovrsource_pytorch",
+    #         raw_headers = native.glob([
+    #             "test/**/*.h",
+    #         ]),
+    #         deps = [
+    #             ":c10_ovrsource",
+    #         ],
+    #     )
+    #     for test in C10_CPU_TEST_FILES
+    # ]
+
+    # [
+    #     oxx_test(
+    #         name = paths.basename(test)[:-len(".cpp")] + "_ovrsource",
+    #         srcs = [test],
+    #         compatible_with = cuda_supported_platforms,
+    #         compiler_flags = select({
+    #             "DEFAULT": [],
+    #             "ovr_config//compiler:cl": [
+    #                 "/w",
+    #             ],
+    #             "ovr_config//compiler:clang": [
+    #                 "-Wno-error",
+    #             ],
+    #         }),
+    #         framework = "gtest",
+    #         oncall = "ovrsource_pytorch",
+    #         raw_headers = native.glob([
+    #             "test/**/*.h",
+    #         ]),
+    #         runtime_shared_libraries = [
+    #             "//third-party/cuda:cudart",
+    #         ],
+    #         deps = [
+    #             ":c10_cuda_ovrsource",
+    #         ],
+    #     )
+    #     for test in C10_GPU_TEST_FILES
+    # ]
diff --git a/c2_defs.bzl b/c2_defs.bzl
new file mode 100644
index 000000000000..01ec0c6d1642
--- /dev/null
+++ b/c2_defs.bzl
@@ -0,0 +1,549 @@
+load("@bazel_skylib//lib:collections.bzl", "collections")
+load("@bazel_skylib//lib:paths.bzl", "paths")
+load("@fbcode_macros//build_defs:native_rules.bzl", "buck_genrule")
+load("@fbsource//tools/build_defs:default_platform_defs.bzl", "compose_platform_setting_list")
+load("@fbsource//tools/build_defs:dict_defs.bzl", "dict_defs")
+load("@fbsource//tools/build_defs:expect.bzl", "expect")
+load("@fbsource//tools/build_defs:fb_xplat_cxx_library.bzl", "fb_xplat_cxx_library")
+load("@fbsource//tools/build_defs:fbsource_utils.bzl", "is_arvr_mode")
+load("@fbsource//tools/build_defs:platform_defs.bzl", "ANDROID", "APPLE", "CXX", "IOS", "MACOSX", "WINDOWS")
+load("@fbsource//tools/build_defs/apple:build_mode_defs.bzl", "is_production_build")
+load("@fbsource//tools/build_defs/apple:config_utils_defs.bzl", "STATIC_LIBRARY_IOS_CONFIG", "STATIC_LIBRARY_MAC_CONFIG", "fbobjc_configs")
+load("@fbsource//tools/build_defs/apple:focus_config.bzl", "is_focus_enabled")
+load("@fbsource//xplat/pfh/Msgr/Mobile/ProductInfra:DEFS.bzl", "Msgr_Mobile_ProductInfra")
+
+def get_c2_expose_op_to_c10():
+    c2_op_to_c10 = native.read_config("caffe2", "expose_op_to_c10", "0")
+
+    expect(
+        c2_op_to_c10 in ("0", "1"),
+        c2_op_to_c10,
+    )
+
+    return bool(int(c2_op_to_c10))
+
+def get_c2_mpscnn():
+    c2_mpscnn = native.read_config("caffe2", "enable_mpscnn", "1")
+
+    expect(
+        c2_mpscnn in ("0", "1"),
+        c2_mpscnn,
+    )
+
+    return bool(int(c2_mpscnn))
+
+def get_c2_mpscnn_test():
+    c2_mpscnn_test = native.read_config("caffe2", "enable_mpscnn_test", "0")
+
+    expect(
+        c2_mpscnn_test in ("0", "1"),
+        c2_mpscnn_test,
+    )
+
+    return bool(int(c2_mpscnn_test))
+
+def get_c2_nomnigraph():
+    c2_nomnigraph = native.read_config("caffe2", "enable_nomnigraph", "1")
+
+    expect(
+        c2_nomnigraph in ("0", "1"),
+        c2_nomnigraph,
+    )
+
+    return bool(int(c2_nomnigraph))
+
+def get_c2_qpl():
+    c2_qpl = native.read_config("caffe2", "enable_qpl", "1")
+
+    expect(
+        c2_qpl in ("0", "1"),
+        c2_qpl,
+    )
+
+    return bool(int(c2_qpl))
+
+def get_c2_strip_debug_info():
+    c2_strip_debug_info = native.read_config("caffe2", "strip_debug_info", "0")
+
+    expect(
+        c2_strip_debug_info in ("0", "1"),
+        c2_strip_debug_info,
+    )
+
+    return bool(int(c2_strip_debug_info))
+
+def get_c2_strip_glog():
+    c2_strip_glog = native.read_config("caffe2", "strip_glog", "1")
+
+    expect(
+        c2_strip_glog in ("0", "1"),
+        c2_strip_glog,
+    )
+
+    return bool(int(c2_strip_glog))
+
+def get_c2_tvm():
+    c2_tvm = native.read_config("caffe2", "enable_tvm", "1")
+
+    expect(
+        c2_tvm in ("0", "1"),
+        c2_tvm,
+    )
+
+    return bool(int(c2_tvm))
+
+_C2_XPLAT_NO_HPTT_PREPROCESSOR_FLAGS = [
+    "-fexceptions",
+    "-frtti",
+    "-Wno-shadow",
+    "-Wno-unknown-pragmas",
+    "-Wno-unused-variable",
+    "-Wno-sign-compare",
+    "-Icaffe2",
+    "-Imodules",
+    "-DEIGEN_NO_DEBUG",
+    "-DCAFFE2_USE_LITE_PROTO",
+    "-DCAFFE2_USE_GOOGLE_GLOG",
+    "-DCAFFE2_RNN_NO_TEXT_FORMAT",
+    "-DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK=1",
+    "-DCAFFE2_IS_XPLAT_BUILD",
+    "-DSTRIP_ERROR_MESSAGES",
+    "-DUSE_INTERNAL_PTHREADPOOL_IMPL",
+]
+
+def get_c2_xplat_no_hptt_preprocessor_flags():
+    flags = []
+    flags += _C2_XPLAT_NO_HPTT_PREPROCESSOR_FLAGS
+    if is_arvr_mode() and get_c2_strip_glog():
+        flags += ["-UGOOGLE_STRIP_LOG", "-DGOOGLE_STRIP_LOG=1"]
+    if get_c2_expose_op_to_c10():
+        flags += ["-DEXPOSE_C2_OPS", "-frtti"]
+    return flags
+
+C2_XPLAT_SERVER_PREPROCESSOR_FLAGS = [
+    "-DCAFFE2_USE_EIGEN_FOR_BLAS",
+    "-DC10_DISABLE_SIGNAL_HANDLERS",
+    "-DCAFFE2_DISABLE_NUMA",
+]
+
+C2_XPLAT_HPTT_PREPROCESSOR_FLAGS = [
+    "-DCAFFE2_USE_HPTT",
+]
+
+def get_c2_xplat_preprocessor_flags():
+    flags = get_c2_xplat_no_hptt_preprocessor_flags() + C2_XPLAT_HPTT_PREPROCESSOR_FLAGS
+    if get_c2_nomnigraph():
+        flags.append("-DCAFFE2_OPTIMIZER")
+    return flags
+
+def get_c2_xplat_no_hptt_compiler_flags():
+    return [
+        "-Os",
+    ] + get_c2_xplat_no_hptt_preprocessor_flags()
+
+def get_c2_xplat_compiler_flags():
+    return get_c2_xplat_no_hptt_compiler_flags() + C2_XPLAT_HPTT_PREPROCESSOR_FLAGS
+
+def get_c2_fbobjc_xplat_compiler_flags():
+    flags = []
+
+    if is_production_build():
+        flags.append("-DCAFFE2_NO_OPERATOR_SCHEMA")
+
+    flags.append("-DCAFFE2_NO_GRADIENT_OPS")
+
+    # For iOS production builds (and all Android builds), strip GLOG logging to
+    # save size. We can disable by setting caffe2.strip_glog=0 in .buckconfig.local.
+    if is_production_build() or get_c2_strip_glog():
+        flags += ["-UGOOGLE_STRIP_LOG", "-DGOOGLE_STRIP_LOG=3"]
+    else:
+        flags.append("-UGOOGLE_STRIP_LOG")
+
+    return flags
+
+def get_c2_fbandroid_xplat_compiler_flags():
+    flags = [
+        # T95767731 -- remove this once all builds are on at least llvm-13
+        "-Wno-unknown-warning-option",
+        "-Wno-unused-but-set-variable",
+    ]
+
+    if get_c2_strip_glog():
+        flags += ["-UGOOGLE_STRIP_LOG", "-DGOOGLE_STRIP_LOG=1"]
+
+    if get_c2_strip_debug_info():
+        flags.append("-g0")
+
+    return flags
+
+_C2_FBOBJC_COMPILER_FLAGS = [
+    "-Wno-missing-prototypes",
+    "-Wno-global-constructors",
+    "-Wno-unknown-pragmas",
+    "-Wno-invalid-partial-specialization",
+    "-Wno-missing-braces",
+    "-Wno-range-loop-analysis",
+]
+
+def get_c2_fbobjc_compiler_flags():
+    flags = list(_C2_FBOBJC_COMPILER_FLAGS)
+
+    # Avoid linking Accelerate on MacOS because we have
+    # inconsistent LAPACK headers (see problems in D19257077).
+    flags.append("-DCAFFE2_USE_ACCELERATE" if not is_arvr_mode() else "-DCAFFE2_USE_EIGEN_FOR_BLAS")
+    if get_c2_mpscnn():
+        flags.append(
+            # TODO(t19120552) - fix this. MPSCNNConvolutionDescriptor.strideInPixelsX
+            # is marked as iOS 11+, but it's been available since iOS 10.
+            "-Wno-unguarded-availability",
+        )
+    return flags
+
+C2_FBOBJC_MACOSX_COMPILER_FLAGS = [
+    "-msse4.2",
+]
+
+C2_FBOBJC_IPHONE_COMPILER_FLAGS = [
+    "-mfpu=neon-fp16",
+]
+
+def get_c2_fbobjc_frameworks():
+    frameworks = []
+    if not is_arvr_mode():
+        frameworks.append(
+            # On iOS, presumably Accelerate is a faster BLAS
+            "$SDKROOT/System/Library/Frameworks/Accelerate.framework",
+        )
+    return frameworks
+
+def get_c2_fbobjc_ios_frameworks():
+    frameworks = []
+
+    if get_c2_mpscnn():
+        frameworks.append(
+            "$SDKROOT/System/Library/Frameworks/Metal.framework",
+        )
+
+    return frameworks
+
+def get_c2_fbobjc_linker_flags():
+    flags = []
+
+    if get_c2_mpscnn():
+        # Need linker flags as no platform_frameworks exist, and we can't
+        # use MPSCNN on x86_64.
+        # We use weak_framework as it's iOS 10
+        flags = [
+            "-L$SDKROOT/System/Library/Frameworks/MetalPerformanceShaders.framework",
+            "-weak_framework",
+            "MetalPerformanceShaders",
+        ]
+    return flags
+
+def get_c2_fbobjc_exported_preprocessor_flags():
+    flags = []
+
+    if get_c2_mpscnn():
+        flags.append("-DCAFFE2_USE_MPSCNN")
+
+        if get_c2_mpscnn_test():
+            flags.append("-DCAFFE2_USE_MPSCNN_TEST")
+
+    return flags
+
+def get_c2_fbandroid_exported_preprocessor_flags():
+    flags = []
+
+    BUILD_MODE_DO_NOT_USE_WITHOUT_ASKING_SERIOUSLY = native.read_config(
+        "fbandroid",
+        "build_mode",
+        "dev",
+    )
+    if BUILD_MODE_DO_NOT_USE_WITHOUT_ASKING_SERIOUSLY == "opt":
+        flags.append("-DCAFFE2_NO_OPERATOR_SCHEMA")
+
+    flags.append("-DCAFFE2_NO_GRADIENT_OPS")
+
+    return flags
+
+C2_FBANDROID_COMPILER_FLAGS = [
+    "-DCAFFE2_USE_EIGEN_FOR_BLAS",
+    "-Wno-unknown-pragmas",
+    "-Wno-deprecated-declarations",
+    "-Wno-invalid-partial-specialization",
+    "-Wno-missing-braces",
+]
+
+C2_FBANDROID_ARMV7_COMPILER_FLAGS = [
+    "-mfpu=neon-fp16",
+]
+
+C2_FBANDROID_X86_COMPILER_FLAGS = [
+    "-mssse3",
+]
+
+C2_FBANDROID_LINKER_FLAGS = []
+
+C2_FBOBJC_EXTRA_TARGET_CONFIG = {
+    "MTL_LANGUAGE_REVISION": "Metal12",
+}
+
+def get_c2_default_cxx_args():
+    return dict(
+        header_namespace = "",
+        apple_sdks = (IOS, MACOSX),
+        compiler_flags = get_c2_xplat_compiler_flags(),
+        fbandroid_compiler_flags = C2_FBANDROID_COMPILER_FLAGS + get_c2_fbandroid_xplat_compiler_flags(),
+        fbandroid_exported_platform_preprocessor_flags = [
+            (
+                "android-armv7",
+                get_c2_fbandroid_exported_preprocessor_flags(),
+            ),
+        ],
+        fbandroid_linker_flags = C2_FBANDROID_LINKER_FLAGS,
+        fbandroid_platform_compiler_flags = [
+            ("android-armv7", C2_FBANDROID_ARMV7_COMPILER_FLAGS),
+            (".*x86.*", C2_FBANDROID_X86_COMPILER_FLAGS),
+        ],
+        fbobjc_compiler_flags = get_c2_fbobjc_compiler_flags() + get_c2_fbobjc_xplat_compiler_flags(),
+        fbobjc_configs = fbobjc_configs(
+            STATIC_LIBRARY_IOS_CONFIG,
+            extra_target_config = C2_FBOBJC_EXTRA_TARGET_CONFIG,
+        ),
+        fbobjc_exported_platform_linker_flags = [
+            (
+                "iphoneos",
+                get_c2_fbobjc_linker_flags(),
+            ),
+        ],
+        fbobjc_exported_platform_preprocessor_flags = [
+            (
+                "iphoneos",
+                get_c2_fbobjc_exported_preprocessor_flags(),
+            ),
+        ],
+        fbobjc_frameworks = get_c2_fbobjc_frameworks() + get_c2_fbobjc_ios_frameworks(),
+        fbobjc_platform_compiler_flags = [
+            ("iphoneos", C2_FBOBJC_IPHONE_COMPILER_FLAGS),
+        ],
+        macosx_compiler_flags = C2_FBOBJC_MACOSX_COMPILER_FLAGS,
+        fbobjc_macosx_configs_override = fbobjc_configs(
+            STATIC_LIBRARY_MAC_CONFIG,
+        ),
+        macosx_frameworks_override = get_c2_fbobjc_frameworks(),
+        preprocessor_flags = [
+            # Use the internal pthreadpool impl for all Caffe2 targets on all
+            # platforms but do not export the preprocessor flag downstream.
+            "-DUSE_INTERNAL_PTHREADPOOL_IMPL",
+        ],
+        visibility = ["PUBLIC"],
+        windows_preferred_linkage = "static" if is_arvr_mode() else None,
+        xcode_public_headers_symlinks = True,
+    )
+
+def get_c2_aten_cpu_fbobjc_macosx_deps():
+    if is_focus_enabled():
+        # focus2 is broken when using platform deps (T80070498) so in the case
+        # where it's focus2 we just add fbgemm as a standard dep. Otherwise we
+        # use platform deps to select correctly for arm64.
+        return [
+            "fbsource//xplat/deeplearning/fbgemm:fbgemm",
+            "fbsource//xplat/caffe2:cpukernel_avx2",
+        ]
+    else:
+        return []
+
+def get_c2_aten_cpu_fbobjc_macosx_platform_deps():
+    if is_focus_enabled():
+        # focus2 is broken when using platform deps (T80070498) so in the case
+        # where it's focus2 we just add fbgemm as a standard dep. Otherwise we
+        # use platform deps to select correctly for arm64.
+        return []
+    else:
+        return compose_platform_setting_list([
+            {
+                "cpu": "x86_64",
+                "flags": [
+                    "fbsource//xplat/deeplearning/fbgemm:fbgemmAppleMac",
+                ] + ([
+                    "fbsource//xplat/caffe2:cpukernel_avx2AppleMac",
+                ] if not is_arvr_mode() else []),
+                "os": "macosx",
+            },
+            {
+                "cpu": "arm64",
+                "flags": ["fbsource//xplat/third-party/XNNPACK:XNNPACKAppleMac"],
+                "os": "macosx",
+            },
+        ])
+
+def c2_cxx_library(**kwargs):
+    args = get_c2_default_cxx_args()
+    args.update(kwargs)
+    args.setdefault("platforms", (ANDROID, APPLE, CXX, WINDOWS))
+    fb_xplat_cxx_library(
+        labels = [
+            "supermodule:android/default/caffe2",
+            "supermodule:ios/default/public.caffe2",
+        ],
+        feature = Msgr_Mobile_ProductInfra,
+        **args
+    )
+
+def c2_protobuf_rule(protos):
+    cpps = []
+    headers = {}
+    raw_headers = {}
+    for p in protos:
+        proto = paths.basename(p)
+        if native.host_info().os.is_windows:
+            protocexe = "$(exe fbsource//third-party/protobuf:protoc-host)" if is_arvr_mode() else "$(location fbsource//xplat/third-party/protobuf:protoc.Windows)"
+            protocmd = "powershell.exe -file $(location fbsource//xplat/caffe2/scripts:proto)\\proto.ps1 -Protoc {} -Unprocessed $SRCDIR/{} -Processed $SRCDIR/{} -out $OUT -srcdir $SRCDIR".format(protocexe, p, proto)
+        else:
+            protocmd = ("cp $SRCDIR/{} $SRCDIR/{} && chmod +w $SRCDIR/{} && echo \"option optimize_for = LITE_RUNTIME;\" >> $SRCDIR/{} && ".format(p, proto, proto, proto) +
+                        "cp $SRCDIR/caffe2/proto/caffe2.proto $SRCDIR/caffe2.proto && chmod +w $SRCDIR/caffe2.proto && echo \"option optimize_for = LITE_RUNTIME;\" >> $SRCDIR/caffe2.proto && " +
+                        "sed -i -e 's/caffe2\\/proto\\/caffe2.proto/caffe2.proto/g' $SRCDIR/{} && ".format(proto) +
+                        ("$(exe fbsource//third-party/protobuf:protoc-host) " if is_arvr_mode() else "$(exe fbsource//xplat/third-party/protobuf:protoc) --osx $(location fbsource//xplat/third-party/protobuf:protoc.Darwin) --linux $(location fbsource//xplat/third-party/protobuf:protoc.Linux) ") +
+                        "-I $SRCDIR --cpp_out=$OUT $SRCDIR/{}".format(proto))
+        buck_genrule(
+            name = proto,
+            srcs = sorted(collections.uniq([p, "caffe2/proto/caffe2.proto"])),
+            cmd_exe = protocmd,
+            bash = protocmd,
+            out = ".",
+        )
+        (name, _) = paths.split_extension(proto)
+        cpp = name + ".pb.cc"
+        h = name + ".pb.h"
+        buck_genrule(
+            name = h,
+            cmd_exe = "@powershell -Command \" & { " + "(Get-Content $(location :{})\\{}".format(proto, h) + ") -replace \\\"caffe2.pb.h\\\", \\\"caffe2/proto/caffe2.pb.h\\\" | Set-Content $OUT } \"",
+            bash = "cp -f $(location :{})/{} $OUT  && ".format(proto, h) +
+                   "sed -i -e 's/caffe2.pb.h/caffe2\\/proto\\/caffe2.pb.h/g' $OUT",
+            out = h,
+        )
+        headers["caffe2/proto/" + h] = ":{}".format(h)
+        raw_headers[h] = ":{}".format(h)
+        buck_genrule(
+            name = cpp,
+            cmd_exe = "@powershell -Command copy $(location :{})/{} $OUT".format(proto, cpp),
+            bash = "cp -f $(location :{})/{} $OUT".format(proto, cpp),
+            out = cpp,
+        )
+        cpps.append(":{}".format(cpp))
+    return (cpps, headers, raw_headers)
+
+# C2 uses lite version of protobuf while torch/jit uses some method only exists
+# in full protobuf. This is a temporary workaround to enable experiment build.
+# DO NOT USE IT IN PRODUCTION BUILD!
+def c2_full_protobuf_rule(protos):
+    prefix = "full_"
+    cpps = []
+    headers = {}
+    raw_headers = {}
+    for p in protos:
+        proto = paths.basename(p)
+        if native.host_info().os.is_windows:
+            protocexe = "$(exe fbsource//third-party/protobuf:protoc-host)" if is_arvr_mode() else "$(location fbsource//xplat/third-party/protobuf:protoc.Windows)"
+            protocmd = "powershell.exe -file $(location fbsource//xplat/caffe2/scripts:proto)\\proto.ps1 -Protoc {} -Unprocessed $SRCDIR/{} -Processed $SRCDIR/{} -out $OUT -srcdir $SRCDIR".format(protocexe, p, proto)
+        else:
+            protocmd = ("cp $SRCDIR/{} $SRCDIR/{} && ".format(p, proto) +
+                        "cp $SRCDIR/caffe2/proto/caffe2.proto $SRCDIR/caffe2.proto && " +
+                        "sed -i -e 's/caffe2\\/proto\\/caffe2.proto/caffe2.proto/g' $SRCDIR/{} && ".format(proto) +
+                        ("$(exe fbsource//third-party/protobuf:protoc-host) " if is_arvr_mode() else "$(exe fbsource//xplat/third-party/protobuf:protoc) --osx $(location fbsource//xplat/third-party/protobuf:protoc.Darwin) --linux $(location fbsource//xplat/third-party/protobuf:protoc.Linux) ") +
+                        "-I $SRCDIR --cpp_out=$OUT $SRCDIR/{}".format(proto))
+        buck_genrule(
+            name = prefix + proto,
+            srcs = sorted(collections.uniq([p, "caffe2/proto/caffe2.proto"])),
+            cmd = protocmd,
+            out = ".",
+        )
+        (name, _) = paths.split_extension(proto)
+        cpp = name + ".pb.cc"
+        h = name + ".pb.h"
+        buck_genrule(
+            name = prefix + h,
+            cmd_exe = "@powershell -Command \" & { " + "(Get-Content $(location :{})\\{}".format(prefix + proto, h) + ") -replace \\\"caffe2.pb.h\\\", \\\"caffe2/proto/caffe2.pb.h\\\" | Set-Content $OUT } \"",
+            bash = "cp -f $(location :{})/{} $OUT  && ".format(prefix + proto, h) +
+                   "sed -i -e 's/caffe2.pb.h/caffe2\\/proto\\/caffe2.pb.h/g' $OUT",
+            out = h,
+        )
+        headers["caffe2/proto/" + h] = ":{}".format(prefix + h)
+        raw_headers[h] = ":{}".format(prefix + h)
+        buck_genrule(
+            name = prefix + cpp,
+            cmd_exe = "@powershell -Command copy $(location :{})/{} $OUT".format(prefix + proto, cpp),
+            bash = "cp -f $(location :{})/{} $OUT".format(prefix + proto, cpp),
+            out = cpp,
+        )
+        cpps.append(":{}".format(prefix + cpp))
+    return (cpps, headers, raw_headers)
+
+def libcaffe2_cxx_library(name, use_hptt, **kwargs):
+    c2_cxx_library(
+        name = name,
+        exported_deps = [
+            "fbsource//xplat/caffe2/c10:c10",
+            "fbsource//third-party/protobuf:libprotobuf" if is_arvr_mode() else "fbsource//xplat/third-party/protobuf:fb-protobuf-lite",
+            ":caffe2_protobuf_headers",
+            ":pthreadpool",
+            ":common_core",
+            ":caffe2_proto_types",
+        ],
+        compiler_flags = get_c2_xplat_compiler_flags() if use_hptt else get_c2_xplat_no_hptt_compiler_flags(),
+        exported_preprocessor_flags = get_c2_xplat_preprocessor_flags() if use_hptt else get_c2_xplat_no_hptt_preprocessor_flags(),
+        cxx_preprocessor_flags = C2_XPLAT_SERVER_PREPROCESSOR_FLAGS,
+        fbandroid_exported_preprocessor_flags = get_c2_fbandroid_xplat_compiler_flags(),
+        fbobjc_exported_preprocessor_flags = get_c2_fbobjc_xplat_compiler_flags(),
+        # Hack to work around lack of platform_srcs support in Xcode project generation.
+        macosx_extra_xcode_sources_override = [],
+        link_whole = True,
+        **kwargs
+    )
+
+def c2_operator_library(name, **kwargs):
+    dict_defs.key_extend(
+        kwargs,
+        "deps",
+        [
+            "fbsource//xplat/folly:molly",
+            "fbsource//third-party/glog:glog",
+            ":caffe2",
+        ] + ([":aten_cpu"] if get_c2_expose_op_to_c10() else []),
+    )
+
+    # NOTE: Currently operators can "depend" on other operators, which is used
+    # so that loading one will implicitly load the dependencies.  So, make sure
+    # that no `--as-needed` flags pulled in from dependencies cause these
+    # operator deps to get dropped.
+    linker_flags = [
+        "-Wl,--no-as-needed",
+    ]
+    c2_cxx_library(
+        name = name,
+        soname = "lib" + name + ".$(ext)",
+        fbandroid_compiler_flags = get_c2_default_cxx_args()["fbandroid_compiler_flags"] + ["-Os"],
+        fbobjc_compiler_flags = get_c2_default_cxx_args()["fbobjc_compiler_flags"] + ["-Oz", "-DCOMPILING_FOR_MIN_SIZE=1"],
+        link_whole = True,
+        cxx_exported_linker_flags = linker_flags,
+        fbandroid_exported_linker_flags = linker_flags,
+        exported_deps = [
+            ":caffe2",
+        ],
+        **kwargs
+    )
+
+def c2_genrule(genrule, genfiles, prefix = "", src_path = "", header_namespace = ""):
+    headers = {}
+    srcs = []
+    for generated_filename in genfiles:
+        buck_genrule(
+            name = prefix + generated_filename,
+            bash = "cp -f $(location :{})/{} $OUT".format(genrule, src_path + generated_filename),
+            cmd_exe = "@powershell -Command copy $(location :{})/{} $OUT".format(genrule, src_path + generated_filename),
+            out = generated_filename,
+        )
+        rule = ":{}{}".format(prefix, generated_filename)
+        headers[header_namespace + generated_filename] = rule
+        srcs.append(rule)
+    return {"headers": headers, "srcs": srcs}
diff --git a/c2_test_defs.bzl b/c2_test_defs.bzl
new file mode 100644
index 000000000000..8ef83073d6fa
--- /dev/null
+++ b/c2_test_defs.bzl
@@ -0,0 +1,20 @@
+load("@fbsource//tools/build_defs:fb_xplat_cxx_test.bzl", "fb_xplat_cxx_test")
+load("@fbsource//tools/build_defs:platform_defs.bzl", "ANDROID", "APPLE", "CXX", "IOS", "MACOSX")
+load("@fbsource//xplat/caffe2:c2_defs.bzl", "get_c2_default_cxx_args")
+
+def c2_cxx_test(**kwargs):
+    args = get_c2_default_cxx_args()
+    args.update(kwargs)
+    args["fbandroid_use_instrumentation_test"] = True
+    for flag in [
+        "macosx_compiler_flags",
+        "fbobjc_macosx_configs_override",
+        "macosx_frameworks_override",
+        "xcode_public_headers_symlinks",
+        "macosx_inherited_buck_flags_override",
+    ]:
+        args.pop(flag, None)
+    args["apple_sdks"] = (IOS, MACOSX)
+    args["platforms"] = (CXX, APPLE, ANDROID)
+    args["contacts"] = ["oncall+ai_infra_mobile_platform@xmail.facebook.com"]
+    fb_xplat_cxx_test(**args)
diff --git a/caffe2/BUILD_MODE.bzl b/caffe2/BUILD_MODE.bzl
new file mode 100644
index 000000000000..1fbd3e6f7a47
--- /dev/null
+++ b/caffe2/BUILD_MODE.bzl
@@ -0,0 +1,23 @@
+""" build mode definitions for caffe2/caffe2 """
+
+load("@fbcode//:BUILD_MODE.bzl", get_parent_modes = "all_modes_keep_gpu_sections_all_modes_use_lld")
+load("@fbcode_macros//build_defs:create_build_mode.bzl", "extend_build_mode")
+
+def update_mode_struct(name, mode_struct):
+    if name == "dev":
+        return extend_build_mode(
+            mode_struct,
+            # TODO(ipbrady): Modules introduce floating point inaccuracies (T43879333)
+            cxx_modules = False,
+        )
+    else:
+        return mode_struct
+
+_modes = {
+    mode_name: update_mode_struct(mode_name, mode_struct)
+    for mode_name, mode_struct in get_parent_modes().items()
+}
+
+def get_modes():
+    """ Return modes for this file """
+    return _modes
diff --git a/caffe2/defs.bzl b/caffe2/defs.bzl
new file mode 100644
index 000000000000..39f4b1b5d93d
--- /dev/null
+++ b/caffe2/defs.bzl
@@ -0,0 +1,89 @@
+# useful command for debugging which files are included:
+# buck targets caffe2/caffe2: --json | jq -r "map(select(.srcs)) | map({key: .name, value: .srcs | sort}) | from_entries"
+load("@fbsource//tools/build_defs:type_defs.bzl", "is_list")
+load("//tools/build/buck:flags.bzl", "get_flags")
+
+flags = get_flags()
+
+_BASE_PATHS = (
+    "core/*",
+    "core/boxing/*",
+    "core/boxing/impl/*",
+    "core/dispatch/*",
+    "core/op_registration/*",
+    "cuda_rtc/*",
+    "db/*",
+    "experiments/operators/*",
+    "ideep/**/*",
+    "observers/*",
+    "onnx/**/*",
+    "operators/**/*",
+    "observers/*",
+    "predictor/*",
+    "queue/*",
+    "sgd/*",
+    "share/contrib/zstd/*",
+    "transforms/*",
+    "utils/**/*",
+)
+
+_BASE_SGX_PATHS = (
+    "core/*",
+    "core/boxing/*",
+    "core/boxing/impl/*",
+    "core/dispatch/*",
+    "core/op_registration/*",
+    "cuda_rtc/*",
+    "db/*",
+    "experiments/operators/*",
+    "observers/*",
+    "onnx/**/*",
+    "operators/**/*",
+    "observers/*",
+    "predictor/*",
+    "queue/*",
+    "sgd/*",
+    "serialize/*",
+    "share/contrib/zstd/*",
+    "transforms/*",
+    "utils/**/*",
+)
+
+def get_sgx_patterns(ext):
+    if not is_list(ext):
+        ext = [ext]
+    return [path + e for path in _BASE_SGX_PATHS for e in ext]
+
+def get_patterns(ext):
+    if not is_list(ext):
+        ext = [ext]
+    return [path + e for path in _BASE_PATHS for e in ext]
+
+def get_simd_preprocessor_flags():
+    return [
+        "-DUSE_FBGEMM",
+    ]
+
+def get_simd_compiler_flags():
+    if flags.USE_SSE_ONLY:
+        return ["-mno-avx"]
+
+    simd_compiler_flags = [
+        "-mavx",
+    ] + get_simd_preprocessor_flags()
+
+    # Every uarch with AVX512 support has AVX2 support
+    if (flags.USE_AVX2 or flags.USE_AVX512):
+        simd_compiler_flags += [
+            "-mavx2",
+            "-mfma",
+        ]
+
+    if flags.USE_AVX512:
+        simd_compiler_flags += [
+            "-mavx512f",
+            "-mavx512dq",
+            "-mavx512vl",
+        ]
+
+    return simd_compiler_flags
diff --git a/caffe2/defs_hip.bzl b/caffe2/defs_hip.bzl
new file mode 100644
index 000000000000..a93fe3569060
--- /dev/null
+++ b/caffe2/defs_hip.bzl
@@ -0,0 +1,149 @@
+load("@bazel_skylib//lib:paths.bzl", "paths")
+load(
+    "//caffe2:defs_hip.bzl",
+    "caffe2_includes",
+    "caffe2_video_image_includes",
+    "get_hip_file_path",
+)
+
+gpu_file_extensions = [".cu", ".c", ".cc", ".cpp"]
+gpu_header_extensions = [".cuh", ".h", ".hpp"]
+
+def is_caffe2_gpu_file(filepath):
+    # those files are needed since they define placeholders
+    if "/native/cudnn/" in filepath:
+        return True
+
+    # files that are already compatible with hip
+    if "/hip/" in filepath:
+        return False
+
+    # exclude all cudnn and nvrtc implementations except for nvrtc_stub
+    if "/nvrtc_stub/" in filepath:
+        return True
+    if any([keyword in filepath for keyword in ("cudnn", "nvrtc", "NVRTC")]):
+        return False
+
+    if "/cuda/" in filepath:
+        return True
+
+    filename = paths.basename(filepath)
+    _, ext = paths.split_extension(filename)
+
+    if "gpu" in filename or ext in [".cu", ".cuh"]:
+        return True
+
+    return False
+
+def get_caffe2_hip_srcs(
+        include_patterns = caffe2_includes,
+        include_files = [],
+        project_dir = "caffe2"):
+    gpu_file_pattern = [
+        base + suffix
+        for base in include_patterns
+        for suffix in gpu_file_extensions
+    ]
+    native_gpu_files = native.glob(gpu_file_pattern) + include_files
+
+    # store the original
+    gpu_files = []
+    hip_files = []
+    for name in native_gpu_files:
+        # exclude test files
+        if "_test" in paths.basename(name) or not is_caffe2_gpu_file(name):
+            continue
+
+        gpu_files.append(name)
+        hip_file_name = get_hip_file_path(name, is_caffe2 = True)
+        hip_files.append(hip_file_name)
+
+    # there will be some native hip files that needs suffix changed
+    native_hip_pattern = [
+        base[:-1] + "hip/*.hip"
+        for base in include_patterns
+    ]
+    native_hip_files = native.glob(native_hip_pattern)
+
+    gpu_files += native_hip_files
+    hip_files += native_hip_files
+
+    # we run hipify script under the caffe2 folder; therefore we need to
+    # prepend caffe2 to the path so that buck can find the hipified file
+    real_hip_files = []
+    for filename in hip_files:
+        real_hip_files.append(paths.join(project_dir, filename))
+
+    # return the src and output_gen files
+    return gpu_files, real_hip_files
+
+def get_caffe2_hip_headers(
+        include_patterns = caffe2_includes,
+        include_files = [],
+        project_dir = "caffe2"):
+    header_pattern = [
+        base + suffix
+        for base in include_patterns
+        for suffix in gpu_header_extensions
+    ]
+    native_header_files = native.glob(header_pattern) + include_files
+
+    header_files = []
+    hip_headers = []
+    for name in native_header_files:
+        # exclude test files
+        # if the caller directly specifies files via include_files, follow it
+        if not name in include_files and ("_test" in paths.basename(name) or not is_caffe2_gpu_file(name)):
+            continue
+
+        header_files.append(name)
+        hip_header_name = get_hip_file_path(name, is_caffe2 = True)
+        hip_headers.append(hip_header_name)
+
+    # we run hipify script under the caffe2 folder; therefore we need to
+    # prepend caffe2 to the path so that buck can find the hipified file
+    real_hip_headers = []
+    for filename in hip_headers:
+        real_hip_headers.append(paths.join(project_dir, filename))
+
+    # return the src and output_gen files
+    return header_files, real_hip_headers
+
+def get_caffe2_hip_video_image_srcs():
+    return get_caffe2_hip_srcs(include_patterns = caffe2_video_image_includes)
+
+def get_caffe2_hip_video_image_headers():
+    return get_caffe2_hip_headers(include_patterns = caffe2_video_image_includes)
+
+def get_caffe2_hip_test_files():
+    test_includes = [
+        "**/*_gpu_test.cc",
+    ]
+
+    # let's ignores the mpi test and fb-internal tests for now
+    test_ignores = [
+        "mpi/mpi_gpu_test.cc",
+        # "operators/roi_align_op_gpu_test.cc",
+        "**/fb/**/*_gpu_test.cc",
+    ]
+
+    native_test_files = native.glob(test_includes, exclude = test_ignores)
+
+    test_files = []
+    hip_test_files = []
+    for name in native_test_files:
+        if not is_caffe2_gpu_file(name):
+            continue
+
+        test_files.append(name)
+        hip_file_name = get_hip_file_path(name, is_caffe2 = True)
+        hip_test_files.append(hip_file_name)
+
+    # we run hipify script under the caffe2 folder; therefore we need to
+    # prepend caffe2 to the path so that buck can find the hipified file
+    real_hip_test_files = []
+    for filename in hip_test_files:
+        real_hip_test_files.append(paths.join("caffe2", filename))
+
+    # return the src and output_gen files
+    return test_files, real_hip_test_files
diff --git a/defs.bzl b/defs.bzl
new file mode 100644
index 000000000000..c81f59274c1c
--- /dev/null
+++ b/defs.bzl
@@ -0,0 +1,89 @@
+def get_sleef_deps():
+    return [("sleef", None, "sleef")] if not (host_info().arch.is_aarch64) else []
+
+def get_blas_gomp_deps():
+    if host_info().arch.is_x86_64:
+        return [(
+            "IntelComposerXE",
+            None,
+            native.read_config("fbcode", "mkl_lp64", "mkl_lp64_omp"),
+        )]
+    if host_info().arch.is_aarch64:
+        return [
+            ("OpenBLAS", None, "OpenBLAS"),
+            ("openmp", None, "omp"),
+        ]
+    fail("Unsupported architecture")
+
+default_compiler_flags = [
+    "-Wall",
+    "-Wextra",
+    "-Wno-unused-function",
+    "-Wno-unused-parameter",
+    "-Wno-error=strict-aliasing",
+    "-Wno-unused-local-typedefs",
+    "-Wno-shadow-compatible-local",
+    "-Wno-maybe-uninitialized",  # aten is built with gcc as part of HHVM
+    "-Wno-unknown-pragmas",
+    "-Wno-strict-overflow",
+    # See https://fb.facebook.com/groups/fbcode/permalink/1813348245368673/
+    # These trigger on platform007
+    "-Wno-stringop-overflow",
+    "-Wno-class-memaccess",
+    "-DHAVE_MMAP",
+    "-DUSE_GCC_ATOMICS=1",
+    "-D_FILE_OFFSET_BITS=64",
+    "-DHAVE_SHM_OPEN=1",
+    "-DHAVE_SHM_UNLINK=1",
+    "-DHAVE_MALLOC_USABLE_SIZE=1",
+    "-DTH_HAVE_THREAD",
+    "-DCPU_CAPABILITY_DEFAULT",
+    "-DTH_INDEX_BASE=0",
+    "-DMAGMA_V2",
+    "-DNO_CUDNN_DESTROY_HANDLE",
+    "-DUSE_FBGEMM",
+    "-DUSE_QNNPACK",
+    "-DUSE_PYTORCH_QNNPACK",
+    # The dynamically loaded NVRTC trick doesn't work in fbcode,
+    # and it's not necessary anyway, because we have a stub
+    # nvrtc library which we load canonically anyway
+    "-DUSE_DIRECT_NVRTC",
+    "-DUSE_RUY_QMATMUL",
+] + ([] if native.host_info().os.is_windows else [
+    # XNNPACK depends on an updated version of pthreadpool interface, whose implementation
+    # includes <pthread.h> - a header not available on Windows.
+    "-DUSE_XNNPACK",
+]) + (["-O1"] if native.read_config("fbcode", "build_mode_test_label", "") == "dev-nosan" else [])
+
+compiler_specific_flags = {
+    "clang": [
+        "-Wno-absolute-value",
+        "-Wno-pass-failed",
+        "-Wno-braced-scalar-init",
+    ],
+    "gcc": [
+        "-Wno-error=array-bounds",
+    ],
+}
+
+def get_cpu_parallel_backend_flags():
+    parallel_backend = native.read_config("pytorch", "parallel_backend", "openmp")
+    defs = []
+    if parallel_backend == "openmp":
+        defs.append("-DAT_PARALLEL_OPENMP_FBCODE=1")
+    elif parallel_backend == "tbb":
+        defs.append("-DAT_PARALLEL_NATIVE_TBB_FBCODE=1")
+    elif parallel_backend == "native":
+        defs.append("-DAT_PARALLEL_NATIVE_FBCODE=1")
+    else:
+        fail("Unsupported parallel backend: " + parallel_backend)
+    if native.read_config("pytorch", "exp_single_thread_pool", "0") == "1":
+        defs.append("-DAT_EXPERIMENTAL_SINGLE_THREAD_POOL=1")
+    mkl_ver = native.read_config("fbcode", "mkl_lp64", "mkl_lp64_omp")
+    if mkl_ver == "mkl_lp64_seq":
+        defs.append("-DATEN_MKL_SEQUENTIAL_FBCODE=1")
+    return defs
+
+def is_cpu_static_dispatch_build():
+    mode = native.read_config("fbcode", "caffe2_static_dispatch_mode", "none")
+    return mode == "cpu"
diff --git a/defs_gpu.bzl b/defs_gpu.bzl
new file mode 100644
index 000000000000..3d6cae883089
--- /dev/null
+++ b/defs_gpu.bzl
@@ -0,0 +1,166 @@
+load("@fbcode_macros//build_defs:native_rules.bzl", "buck_genrule")
+load(
+    "//caffe2/caffe2:defs_hip.bzl",
+    "get_caffe2_hip_headers",
+    "get_caffe2_hip_srcs",
+)
+load(":ufunc_defs.bzl", "aten_ufunc_names")
+
+ATEN_CUDA_H_PATTERN = [
+    "aten/src/ATen/cuda/*.h",
+    "aten/src/ATen/cuda/detail/*.h",
+    "aten/src/ATen/cuda/nvrtc_stub/*.h",
+    "aten/src/ATen/cuda/*.cuh",
+    "aten/src/ATen/cuda/detail/*.cuh",
+]
+
+ATEN_CUDA_CPP_PATTERN = [
+    "aten/src/ATen/cuda/*.cpp",
+    "aten/src/ATen/cuda/detail/*.cpp",
+    "aten/src/ATen/cuda/nvrtc_stub/*.cpp",
+]
+
+ATEN_CUDA_CU_PATTERN = [
+    "aten/src/ATen/cuda/*.cu",
+    "aten/src/ATen/cuda/detail/*.cu",
+]
+
+ATEN_CUDNN_H_PATTERN = [
+    "aten/src/ATen/cudnn/*.h",
+    "aten/src/ATen/cudnn/*.cuh",
+]
+
+ATEN_CUDNN_CPP_PATTERN = ["aten/src/ATen/cudnn/*.cpp"]
+
+ATEN_MIOPEN_H_PATTERN = [
+    "aten/src/ATen/miopen/*.h",
+    "aten/src/ATen/miopen/*.cuh",
+]
+
+ATEN_MIOPEN_CPP_PATTERN = ["aten/src/ATen/miopen/*.cpp"]
+
+ATEN_NATIVE_CUDNN_CPP_PATTERN = ["aten/src/ATen/native/cudnn/*.cpp"]
+
+ATEN_NATIVE_MIOPEN_CPP_PATTERN = ["aten/src/ATen/native/miopen/*.cpp"]
+
+ATEN_NATIVE_CUDA_CU_PATTERN = [
+    "aten/src/ATen/native/cuda/*.cu",
+    "aten/src/ATen/native/nested/cuda/*.cu",
+    "aten/src/ATen/native/quantized/cuda/*.cu",
+    "aten/src/ATen/native/sparse/cuda/*.cu",
+    "aten/src/ATen/native/transformers/**/*.cu",
+]
+
+ATEN_NATIVE_CUDA_CPP_PATTERN = [
+    "aten/src/ATen/native/cuda/*.cpp",
+    "aten/src/ATen/native/cuda/linalg/*.cpp",
+    "aten/src/ATen/native/nested/cuda/*.cpp",
+    "aten/src/ATen/native/sparse/cuda/*.cpp",
+    "aten/src/ATen/native/transformers/cuda/*.cpp",
+]
+
+ATEN_NATIVE_CUDA_H_PATTERN = [
+    "aten/src/ATen/native/cudnn/**/*.h",
+    "aten/src/ATen/native/cuda/**/*.h",
+    "aten/src/ATen/native/cuda/**/*.cuh",
+    "aten/src/ATen/native/sparse/cuda/*.h",
+    "aten/src/ATen/native/sparse/cuda/*.cuh",
+    "aten/src/ATen/native/quantized/cuda/*.h",
+    "aten/src/ATen/native/transformers/cuda/*.h",
+    "aten/src/ATen/native/transformers/**/*.cuh",
+]
+
+# T66678203: Clang CUDA rollout
+ATEN_CUDA_CLANG_CU_PATTERN = [
+    "aten/src/ATen/native/cuda/DistributionBernoulli.cu",
+]
+
+### Cuda Files
+def get_aten_cuda_headers():
+    ATEN_CUDA_H = native.glob(ATEN_CUDA_H_PATTERN)
+    ATEN_NATIVE_CUDA_H = native.glob(ATEN_NATIVE_CUDA_H_PATTERN)
+    ATEN_CUDNN_H = native.glob(ATEN_CUDNN_H_PATTERN)
+    return ATEN_CUDA_H + ATEN_NATIVE_CUDA_H + ATEN_CUDNN_H
+
+def get_aten_cuda_srcs():
+    ATEN_CUDA_CU = native.glob(ATEN_CUDA_CU_PATTERN)
+    ATEN_NATIVE_CUDA_CU = native.glob(
+        ATEN_NATIVE_CUDA_CU_PATTERN,
+        exclude = ATEN_CUDA_CLANG_CU_PATTERN,
+    )
+    return ATEN_CUDA_CU + ATEN_NATIVE_CUDA_CU
+
+def get_aten_cuda_clang_srcs():
+    return native.glob(ATEN_CUDA_CLANG_CU_PATTERN)
+
+# CPU+CUDA file
+# Note that these sources and headers include the CPU lists too
+def get_all_cuda_srcs():
+    ATEN_NATIVE_CUDNN_CPP = native.glob(ATEN_NATIVE_CUDNN_CPP_PATTERN)
+    ATEN_CUDNN_CPP = native.glob(ATEN_CUDNN_CPP_PATTERN)
+    ATEN_NATIVE_MIOPEN_CPP = native.glob(ATEN_NATIVE_MIOPEN_CPP_PATTERN)
+    ATEN_CUDA_CPP = native.glob(ATEN_CUDA_CPP_PATTERN)
+    ATEN_NATIVE_CUDA_CPP = native.glob(ATEN_NATIVE_CUDA_CPP_PATTERN)
+
+    return ATEN_NATIVE_CUDNN_CPP + ATEN_CUDNN_CPP + ATEN_NATIVE_MIOPEN_CPP + ATEN_CUDA_CPP + ATEN_NATIVE_CUDA_CPP + get_aten_cuda_srcs()
+
+### HIP files
+# Files that must be hipified
+def get_aten_hip_srcs():
+    ## CU -> HIP files
+    ATEN_CUDA_CU = native.glob(ATEN_CUDA_CU_PATTERN)
+
+    # HIP does not use clang for ATEN_CUDA_CLANG_CU_PATTERN
+    ATEN_NATIVE_CUDA_CU = native.glob(ATEN_NATIVE_CUDA_CU_PATTERN)
+
+    ## CPU files
+    ATEN_NATIVE_CUDNN_CPP = native.glob(ATEN_NATIVE_CUDNN_CPP_PATTERN)
+    ATEN_CUDNN_CPP = native.glob(ATEN_CUDNN_CPP_PATTERN)
+    ATEN_CUDA_CPP = native.glob(ATEN_CUDA_CPP_PATTERN)
+    ATEN_NATIVE_CUDA_CPP = native.glob(ATEN_NATIVE_CUDA_CPP_PATTERN)
+
+    # Get hipified file names (before, after)
+    srcs = ATEN_CUDA_CU + ATEN_NATIVE_CUDA_CU + ATEN_NATIVE_CUDNN_CPP + ATEN_CUDNN_CPP + ATEN_CUDA_CPP + ATEN_NATIVE_CUDA_CPP
+    ret = get_caffe2_hip_srcs(include_patterns = [], include_files = srcs, project_dir = "")
+    return (ret[0], [f.replace("aten/src/", "") for f in ret[1]])
+
+def get_aten_hip_headers():
+    ATEN_CUDA_H = native.glob(ATEN_CUDA_H_PATTERN)
+    ATEN_NATIVE_CUDA_H = native.glob(ATEN_NATIVE_CUDA_H_PATTERN)
+    ATEN_CUDNN_H = []  # native.glob(ATEN_CUDNN_H_PATTERN)
+
+    # Get hipified file names (before, after)
+    srcs = ATEN_CUDA_H + ATEN_NATIVE_CUDA_H + ATEN_CUDNN_H
+    ret = get_caffe2_hip_headers(include_patterns = [], include_files = ATEN_CUDA_H + ATEN_NATIVE_CUDA_H + ATEN_CUDNN_H, project_dir = "")
+    return ret[0], [f.replace("aten/src/", "") for f in ret[1]]
+
+# Native HIP-aware files
+def get_aten_hip_native_srcs():
+    HIP_IMPL_CPP = native.glob(["aten/src/ATen/hip/impl/*.cpp"])
+    ATEN_MIOPEN_CPP = native.glob(ATEN_MIOPEN_CPP_PATTERN)
+    ATEN_NATIVE_MIOPEN_CPP = native.glob(ATEN_NATIVE_MIOPEN_CPP_PATTERN)
+    return HIP_IMPL_CPP + ATEN_MIOPEN_CPP + ATEN_NATIVE_MIOPEN_CPP
+
+def get_aten_hip_native_headers():
+    HIP_IMPL_H = native.glob(["aten/src/ATen/hip/impl/*.h"])
+    ATEN_MIOPEN_H = native.glob(ATEN_MIOPEN_H_PATTERN)
+    return HIP_IMPL_H + ATEN_MIOPEN_H
+
+def get_aten_hip_ufunc_generated_cuda_sources(gencode_pattern = "{}"):
+    # Contents of these CUDA files do not need to be hipified at this point,
+    # but they must be renamed from ".cu" to ".hip" because, unlike OSS, a compiler
+    # is selected based on a file extension.
+
+    renamed_rules = []
+    for n in aten_ufunc_names:
+        cuda_name = "UfuncCUDA_{}.cu".format(n)
+        hip_name = "UfuncCUDA_{}.hip".format(n)
+        buck_genrule(
+            name = "aten_ufunc_hip_renamed_{}".format(n),
+            srcs = [gencode_pattern.format(cuda_name)],
+            bash = 'cp "$SRCDIR/{}" "$OUT"'.format(cuda_name),
+            out = hip_name,
+            default_outs = [],
+        )
+        renamed_rules.append(":aten_ufunc_hip_renamed_{}".format(n))
+    return renamed_rules
diff --git a/defs_hip.bzl b/defs_hip.bzl
new file mode 100644
index 000000000000..061f7fe2157f
--- /dev/null
+++ b/defs_hip.bzl
@@ -0,0 +1,136 @@
+load("@bazel_skylib//lib:paths.bzl", "paths")
+load("@fbcode//tools/build/buck:rocm_flags.bzl", "get_rocm_arch_args")
+
+caffe2_includes = [
+    "operators/**/*",
+    "operators/*",
+    "sgd/*",
+    "transforms/*",
+    # distributed folder is managed by its own TARGETS file
+    # "distributed/*",
+    "queue/*",
+    # "binaries/*",
+    "**/*_test*",
+    "core/*",
+    "db/*",
+    "utils/**/*",
+]
+
+caffe2_video_image_includes = [
+    "image/*",
+    "video/*",
+]
+
+pytorch_includes = [
+    "aten/src/ATen/cuda/*",
+    "aten/src/ATen/native/cuda/*",
+    "aten/src/ATen/native/cuda/linalg/*",
+    "aten/src/ATen/native/cudnn/*",
+    "aten/src/ATen/native/nested/cuda/*",
+    "aten/src/ATen/native/sparse/cuda/*",
+    "aten/src/ATen/native/transformers/cuda/*",
+    "aten/src/THC/*",
+    "aten/src/ATen/test/*",
+    "torch/*",
+]
+
+gpu_file_extensions = [".cu", ".c", ".cc", ".cpp"]
+gpu_header_extensions = [".cuh", ".h", ".hpp"]
+
+hip_external_deps = [
+    ("rocm", None, "amdhip64-lazy"),
+    ("rocm", None, "MIOpen-lazy"),
+    ("rocm", None, "rccl-lazy"),
+    ("rocm", None, "roctracer64-lazy"),
+]
+
+hip_pp_flags = [
+    # HIP 4.4.21432 -> TORCH_HIP_VERSION=404
+    "-DTORCH_HIP_VERSION=(FB_HIP_VERSION/100000)",
+    # ROCm 4.5.2 -> ROCM_VERSION=40502
+    "-DROCM_VERSION=FB_ROCM_VERSION",
+    "-DUSE_ROCM=1",
+    "-D__HIP_PLATFORM_HCC__=1",
+    "-D__HIP_NO_HALF_OPERATORS__=1",
+    "-D__HIP_NO_HALF_CONVERSIONS__=1",
+    "-DCUDA_HAS_FP16=1",
+    "-DCAFFE2_USE_MIOPEN",
+    # The c10/cuda/impl/cuda_cmake_macros.h is not generated for the
+    # hip build yet.
+    "-DC10_HIP_NO_CMAKE_CONFIGURE_FILE",
+    # clang with -fopenmp=libgomp (gcc's OpenMP runtime library) produces
+    #      single threaded code and doesn't define -D_OPENMP by default.
+    # clang with -fopenmp or -fopenmp=libomp (llvm's OpenMP runtime library)
+    #      produces multi-threaded code and defines -D_OPENMP by default.
+    #
+    # hcc currently don't have llvm openmp runtime project builtin.
+    # wrap_hip.py also drops -D_OPENMP if explicitly specified.
+    "-U_OPENMP",
+]
+
+def get_hip_flags():
+    return [
+        # Caffe2 cannot be compiled with NDEBUG using ROCm 4.5.2.
+        # TODO: The issue should be fixed properly.
+        "-UNDEBUG",
+        "-Wno-error=absolute-value",
+        "-Wno-macro-redefined",
+        "-Wno-inconsistent-missing-override",
+        "-Wno-exceptions",
+        "-Wno-shift-count-negative",
+        "-Wno-shift-count-overflow",
+        "-Wno-duplicate-decl-specifier",
+        "-Wno-implicit-int-float-conversion",
+        "-Wno-unused-result",
+        "-Wno-pass-failed",
+        "-Wno-unknown-pragmas",
+        "-Wno-cuda-compat",
+    ] + get_rocm_arch_args()
+
+def get_hip_file_path(filepath, is_caffe2 = False):
+    """
+    this function should be in sync with the hipified script in
+    third-party/hipify_torch/hipify/hipify_python.py
+    unfortunately because it's a normal python (instead of Starlark)
+    we cannot simply import from there
+
+    The general rule of converting file names from cuda to hip is:
+       - If there is a directory component named "cuda", replace
+         it with "hip", AND
+
+       - If the file name contains "CUDA", replace it with "HIP", AND
+
+    If NONE of the above occurred, then insert "hip" in the file path
+    as the direct parent folder of the file
+
+    Furthermore, ALWAYS replace '.cu' with '.hip', because those files
+    contain CUDA kernels that needs to be hipified and processed with
+    hcc compile
+    """
+    dirpath = paths.dirname(filepath)
+    filename = paths.basename(filepath)
+    filename, ext = paths.split_extension(filename)
+
+    if ext == ".cu":
+        ext = ".hip"
+
+    orig_dirpath = dirpath
+
+    dirpath = dirpath.replace("cuda", "hip")
+    dirpath = dirpath.replace("THC", "THH")
+
+    filename = filename.replace("cuda", "hip")
+    filename = filename.replace("CUDA", "HIP")
+
+    # Special case to handle caffe2/core/THCCachingAllocator
+    if not (is_caffe2 and dirpath == "core"):
+        filename = filename.replace("THC", "THH")
+
+    # if the path doesn't change (e.g., path doesn't include "cuda" so we
+    # cannot differentiate), insert "hip" as the direct parent folder
+    # special case for utils/cub_namespace, because it is first used and hipified when used
+    # from core, it doesn't end up in hip directory
+    if dirpath == orig_dirpath and not filename == "cub_namespace":
+        dirpath = paths.join(dirpath, "hip")
+
+    return paths.join(dirpath, filename + ext)
diff --git a/ios/METADATA.bzl b/ios/METADATA.bzl
new file mode 100644
index 000000000000..467644b22773
--- /dev/null
+++ b/ios/METADATA.bzl
@@ -0,0 +1,10 @@
+# THIS FILE IS AUTOMATICALLY GENERATED FROM INFORMATION STORED IN
+# THIRD-PARTY METADATA SERVICE. YOUR MANUAL CHANGES TO THIS FILE WILL
+# BE PRESERVED AND WILL SERVE AS THE SOURCE OF TRUTH FOR METADATA OF
+# THIS PACKAGE.
+# TPMS-GENERATED: b832a8f526016b30c557d8a58fc89d9338a51cff
+METADATA = {
+    "name": "LibTorch",
+    "owner": "ai_infra_mobile_platform",
+    "version": "1.11.0",
+}
diff --git a/ios/TestApp/METADATA.bzl b/ios/TestApp/METADATA.bzl
new file mode 100644
index 000000000000..6ab0710d6660
--- /dev/null
+++ b/ios/TestApp/METADATA.bzl
@@ -0,0 +1,10 @@
+# THIS FILE IS AUTOMATICALLY GENERATED FROM INFORMATION STORED IN
+# THIRD-PARTY METADATA SERVICE. YOUR MANUAL CHANGES TO THIS FILE WILL
+# BE PRESERVED AND WILL SERVE AS THE SOURCE OF TRUTH FOR METADATA OF
+# THIS PACKAGE.
+# TPMS-GENERATED: ba55575493b7ad21fde900f05f93c501b2715a09
+METADATA = {
+    "name": "unf_ext",
+    "owner": "ai_infra_mobile_platform",
+    "version": "0.0.7.6",
+}
diff --git a/ovrsource_aten_gen_defs.bzl b/ovrsource_aten_gen_defs.bzl
new file mode 100644
index 000000000000..0a56c32e579a
--- /dev/null
+++ b/ovrsource_aten_gen_defs.bzl
@@ -0,0 +1,83 @@
+# @nolint
+load("//arvr/tools/build_defs:genrule_utils.bzl", "gen_cmake_header")
+load("//arvr/tools/build_defs:oxx.bzl", "oxx_static_library")
+load(
+    "@fbsource//xplat/caffe2:pt_defs.bzl",
+    "gen_aten_files",
+    "get_aten_codegen_extra_params",
+)
+
+def define_aten_gen():
+    backends = [
+        "CPU",
+        "SparseCPU",
+        "SparseCsrCPU",
+        # "MkldnnCPU",
+        "CUDA",
+        "SparseCUDA",
+        "SparseCsrCUDA",
+        "QuantizedCPU",
+        "QuantizedCUDA",
+        "Meta",
+        "ZeroTensor"
+    ]
+
+    gen_aten_files(
+        name = "gen_aten_ovrsource",
+        extra_flags = get_aten_codegen_extra_params(backends),
+        visibility = ["PUBLIC"],
+    )
+
+    oxx_static_library(
+        name = "ovrsource_aten_generated_cuda_headers",
+        header_namespace = "ATen",
+        public_generated_headers = {
+            "CUDAFunctions.h": ":gen_aten_ovrsource[CUDAFunctions.h]",
+            "CUDAFunctions_inl.h": ":gen_aten_ovrsource[CUDAFunctions_inl.h]",
+        },
+        visibility = ["PUBLIC"],
+    )
+
+    oxx_static_library(
+        name = "ovrsource_aten_generated_meta_headers",
+        header_namespace = "ATen",
+        public_generated_headers = {
+            "MetaFunctions.h": ":gen_aten_ovrsource[MetaFunctions.h]",
+            "MetaFunctions_inl.h": ":gen_aten_ovrsource[MetaFunctions_inl.h]",
+        },
+        visibility = ["PUBLIC"],
+    )
+
+    gen_cmake_header(
+        src = "aten/src/ATen/Config.h.in",
+        defines = [
+            ("@AT_MKLDNN_ENABLED@", "0"),
+            ("@AT_MKL_ENABLED@", "0"),
+            ("@AT_MKL_SEQUENTIAL@", "0"),
+            ("@AT_FFTW_ENABLED@", "0"),
+            ("@AT_NNPACK_ENABLED@", "0"),
+            ("@AT_PARALLEL_OPENMP@", "0"),
+            ("@AT_PARALLEL_NATIVE@", "1"),
+            ("@AT_PARALLEL_NATIVE_TBB@", "0"),
+            ("@AT_POCKETFFT_ENABLED@", "0"),
+            ("@CAFFE2_STATIC_LINK_CUDA_INT@", "1"),
+            ("@AT_BUILD_WITH_BLAS@", "1"),
+            ("@AT_BUILD_WITH_LAPACK@", "1"),
+            ("@AT_BLAS_F2C@", "1"),
+            ("@AT_BLAS_USE_CBLAS_DOT@", "0")
+        ],
+        header = "ATen/Config.h",
+        prefix = "ovrsource_aten_",
+    )
+
+    gen_cmake_header(
+        src = "aten/src/ATen/cuda/CUDAConfig.h.in",
+        defines = [
+            ("@AT_CUDNN_ENABLED@", "1"),
+            ("@AT_ROCM_ENABLED@", "0"),
+            ("@NVCC_FLAGS_EXTRA@", " "),
+            ("@AT_MAGMA_ENABLED@", "0")
+        ],
+        header = "ATen/cuda/CUDAConfig.h",
+        prefix = "ovrsource_aten_",
+    )
diff --git a/ovrsource_caffe2_perfkernels_defs.bzl b/ovrsource_caffe2_perfkernels_defs.bzl
new file mode 100644
index 000000000000..bcfeb6490a01
--- /dev/null
+++ b/ovrsource_caffe2_perfkernels_defs.bzl
@@ -0,0 +1,87 @@
+# @nolint
+load("//arvr/tools/build_defs:oxx.bzl", "oxx_static_library")
+load("@fbsource//xplat/caffe2/c10:ovrsource_defs.bzl", "cpu_supported_platforms")
+
+def define_caffe2_perfkernels():
+    [
+        oxx_static_library(
+            name = "perfkernels_{}_ovrsource".format(arch),
+            srcs = native.glob(["caffe2/perfkernels/*_{}.cc".format(arch)]),
+            compatible_with = ["ovr_config//cpu:x86_64"],
+            compiler_flags = select({
+                "DEFAULT": [],
+                "ovr_config//compiler:cl": [
+                    "/arch:AVX2",
+                    "/w",
+                ],
+                "ovr_config//compiler:clang": [
+                    "-Wno-error",
+                    "-mf16c",
+                ] + (["-mf16c", "-mavx"] if arch == "avx" else ["-mfma", "-mavx2"] if arch == "avx2" else ["-mavx512f"]),
+            }),
+            raw_headers = native.glob([
+                "caffe2/core/*.h",
+                "caffe2/perfkernels/*.h",
+                "caffe2/proto/*.h",
+                "caffe2/utils/*.h",
+            ], exclude = [
+                "caffe2/core/macros.h",
+            ]),
+            reexport_all_header_dependencies = False,
+            deps = [
+                ":caffe2_proto_ovrsource",
+                ":ovrsource_caffe2_macros.h",
+                "@fbsource//xplat/caffe2/c10:c10_ovrsource",
+            ],
+        )
+        for arch in ["avx", "avx2", "avx512"]
+    ]
+
+    oxx_static_library(
+        name = "perfkernels_ovrsource",
+        srcs = native.glob([
+            "caffe2/perfkernels/*.cc",
+        ], exclude = [
+            "**/*_avx*",
+        ]),
+        compatible_with = cpu_supported_platforms,
+        compiler_flags = select({
+            "DEFAULT": [],
+            "ovr_config//compiler:cl": [
+                "/w",
+            ],
+            "ovr_config//compiler:clang": [
+                "-Wno-macro-redefined",
+                "-Wno-shadow",
+                "-Wno-undef",
+                "-Wno-unused-function",
+                "-Wno-unused-local-typedef",
+                "-Wno-unused-variable",
+            ],
+        }),
+        public_include_directories = [],
+        public_raw_headers = native.glob([
+            "caffe2/perfkernels/*.h",
+        ]),
+        raw_headers = native.glob([
+            "caffe2/core/*.h",
+            "caffe2/proto/*.h",
+            "caffe2/utils/*.h",
+        ], exclude = [
+            "caffe2/core/macros.h",
+        ]),
+        reexport_all_header_dependencies = False,
+        deps = [
+            ":caffe2_proto_ovrsource",
+            ":ovrsource_caffe2_macros.h",
+            "//third-party/cpuinfo:cpuinfo",
+            "@fbsource//xplat/caffe2/c10:c10_ovrsource",
+            "//third-party/protobuf:libprotobuf",
+        ] + select({
+            "DEFAULT": [],
+            "ovr_config//cpu:x86_64": [
+                ":perfkernels_avx_ovrsource",
+                ":perfkernels_avx2_ovrsource",
+            ],
+        }),
+    )
diff --git a/ovrsource_caffe2_proto_defs.bzl b/ovrsource_caffe2_proto_defs.bzl
new file mode 100644
index 000000000000..579e807dcf20
--- /dev/null
+++ b/ovrsource_caffe2_proto_defs.bzl
@@ -0,0 +1,20 @@
+# @nolint
+load("//arvr/tools/build_defs:oxx.bzl", "oxx_static_library", "oxx_test")
+load("//arvr/tools/build_defs:oxx_python.bzl", "oxx_python_binary", "oxx_python_library")
+load("//arvr/tools/build_defs:genrule_utils.bzl", "gen_cmake_header")
+load("//arvr/tools/build_defs:protobuf.bzl", "proto_cxx_library")
+load("@bazel_skylib//lib:paths.bzl", "paths")
+
+def define_caffe2_proto():
+    proto_cxx_library(
+        name = "caffe2_proto_ovrsource",
+        protos = [
+            "caffe2/proto/caffe2.proto",
+            "caffe2/proto/caffe2_legacy.proto",
+            "caffe2/proto/hsm.proto",
+            "caffe2/proto/metanet.proto",
+            "caffe2/proto/predictor_consts.proto",
+            "caffe2/proto/prof_dag.proto",
+            "caffe2/proto/torch.proto",
+        ],
+    )
diff --git a/ovrsource_nomnigraph_defs.bzl b/ovrsource_nomnigraph_defs.bzl
new file mode 100644
index 000000000000..2a378f231230
--- /dev/null
+++ b/ovrsource_nomnigraph_defs.bzl
@@ -0,0 +1,101 @@
+# @nolint
+load("//arvr/tools/build_defs:oxx.bzl", "oxx_static_library", "oxx_test")
+load("//arvr/tools/build_defs:oxx_python.bzl", "oxx_python_binary", "oxx_python_library")
+load("//arvr/tools/build_defs:genrule_utils.bzl", "gen_cmake_header")
+load("@bazel_skylib//lib:paths.bzl", "paths")
+
+def define_nomnigraph():
+    oxx_python_binary(
+        name = "nomnigraph_gen_py_ovrsource",
+        main_module = "caffe2.core.nomnigraph.op_gen",
+        deps = [":nomnigraph_gen_py_main_ovrsource"],
+    )
+
+    oxx_python_library(
+        name = "nomnigraph_gen_py_main_ovrsource",
+        srcs = native.glob(["caffe2/core/nomnigraph/*.py"]),
+        base_module = "",
+    )
+
+    nomnigraph_gen_py_cmd = " ".join([
+        "--install_dir=$OUT",
+        "--source_def=caffe2/core/nomnigraph/ops.def",
+        # "--source_def=caffe2/core/nomnigraph/fb/ops.def",
+    ])
+
+    native.genrule(
+        name = "nomnigraph_gen_ovrsource",
+        srcs = [
+            # "caffe2/core/nomnigraph/fb/ops.def",
+            "caffe2/core/nomnigraph/op_gen.py",
+            "caffe2/core/nomnigraph/ops.def",
+        ],
+        cmd_exe = "mkdir $OUT && $(exe :nomnigraph_gen_py_ovrsource) " + nomnigraph_gen_py_cmd,
+        out = "gen",
+    )
+
+    TEST_SRCS = native.glob([
+        "caffe2/core/nomnigraph/tests/*.cc",
+    ], exclude = [
+        "caffe2/core/nomnigraph/tests/GraphTest.cc",  # fails because debug iterator check
+    ])
+
+    oxx_static_library(
+        name = "nomnigraph_ovrsource",
+        srcs = [
+            "caffe2/core/nomnigraph/Representations/NeuralNet.cc",
+        ],
+        compiler_flags = select({
+            "ovr_config//compiler:clang": [
+                "-Wno-undef",
+                "-Wno-shadow",
+                "-Wno-macro-redefined",
+                "-Wno-unused-variable",
+                "-Wno-unused-local-typedef",
+                "-Wno-unused-function",
+            ],
+            "DEFAULT": [],
+        }),
+        public_include_directories = ["caffe2/core/nomnigraph/include"],
+        public_raw_headers = native.glob([
+            "caffe2/core/nomnigraph/include/**/*.h",
+        ]),
+        raw_headers = ["caffe2/core/common.h"],
+        reexport_all_header_dependencies = False,
+        tests = [
+            ":" + paths.basename(filename)[:-len(".cc")] + "_ovrsource"
+            for filename in TEST_SRCS
+        ],
+        deps = [
+            ":ovrsource_caffe2_macros.h",
+            "@fbsource//xplat/caffe2/c10:c10_ovrsource",
+        ],
+    )
+
+    [
+        oxx_test(
+            name = paths.basename(filename)[:-len(".cc")] + "_ovrsource",
+            srcs = [
+                filename,
+                "caffe2/core/nomnigraph/tests/test_util.cc",
+            ],
+            compiler_flags = select({
+                "ovr_config//compiler:clang": [
+                    "-Wno-macro-redefined",
+                    "-Wno-shadow",
+                    "-Wno-undef",
+                    "-Wno-unused-variable",
+                ],
+                "DEFAULT": [],
+            }),
+            framework = "gtest",
+            oncall = "frl_gemini",
+            raw_headers = native.glob([
+                "caffe2/core/nomnigraph/tests/*.h",
+            ]),
+            deps = [
+                ":nomnigraph_ovrsource",
+            ],
+        )
+        for filename in TEST_SRCS
+    ]
diff --git a/pt_template_srcs.bzl b/pt_template_srcs.bzl
new file mode 100644
index 000000000000..b2cbf6cbf5e9
--- /dev/null
+++ b/pt_template_srcs.bzl
@@ -0,0 +1,239 @@
+# This file keeps a list of PyTorch source files that are used for templated selective build.
+# NB: as this is PyTorch Edge selective build, we assume only CPU targets are
+# being built
+
+load("@bazel_skylib//lib:paths.bzl", "paths")
+load("@fbsource//tools/build_defs:fbsource_utils.bzl", "is_arvr_mode")
+load(":build_variables.bzl", "aten_native_source_list")
+load(
+    ":ufunc_defs.bzl",
+    "aten_ufunc_generated_cpu_kernel_sources",
+    "aten_ufunc_generated_cpu_sources",
+)
+
+# Files in this list are supposed to be built separately for each app,
+# for different operator allow lists.
+TEMPLATE_SOURCE_LIST = [
+    "torch/csrc/jit/runtime/register_prim_ops.cpp",
+    "torch/csrc/jit/runtime/register_special_ops.cpp",
+] + aten_native_source_list
+
+# For selective build, we can lump the CPU and CPU kernel sources altogether
+# because there is only ever one vectorization variant that is compiled
+def aten_ufunc_generated_all_cpu_sources(gencode_pattern = "{}"):
+    return (
+        aten_ufunc_generated_cpu_sources(gencode_pattern) +
+        aten_ufunc_generated_cpu_kernel_sources(gencode_pattern)
+    )
+
+TEMPLATE_MASKRCNN_SOURCE_LIST = [
+    "register_maskrcnn_ops.cpp",
+]
+
+TEMPLATE_BATCH_BOX_COX_SOURCE_LIST = [
+    "register_batch_box_cox_ops.cpp",
+]
+
+METAL_SOURCE_LIST = [
+    "aten/src/ATen/native/metal/MetalAten.mm",
+    "aten/src/ATen/native/metal/MetalGuardImpl.cpp",
+    "aten/src/ATen/native/metal/MetalPrepackOpRegister.cpp",
+    "aten/src/ATen/native/metal/MetalCommandBuffer.mm",
+    "aten/src/ATen/native/metal/MetalContext.mm",
+    "aten/src/ATen/native/metal/MetalConvParams.mm",
+    "aten/src/ATen/native/metal/MetalTensorImplStorage.mm",
+    "aten/src/ATen/native/metal/MetalTensorUtils.mm",
+    "aten/src/ATen/native/metal/mpscnn/MPSCNNClampOp.mm",
+    "aten/src/ATen/native/metal/mpscnn/MPSCNNConvOp.mm",
+    "aten/src/ATen/native/metal/mpscnn/MPSCNNFullyConnectedOp.mm",
+    "aten/src/ATen/native/metal/mpscnn/MPSCNNNeuronOp.mm",
+    "aten/src/ATen/native/metal/mpscnn/MPSCNNUtils.mm",
+    "aten/src/ATen/native/metal/mpscnn/MPSImage+Tensor.mm",
+    "aten/src/ATen/native/metal/mpscnn/MPSImageUtils.mm",
+    "aten/src/ATen/native/metal/mpscnn/MPSImageWrapper.mm",
+    "aten/src/ATen/native/metal/ops/MetalAddmm.mm",
+    "aten/src/ATen/native/metal/ops/MetalBinaryElementwise.mm",
+    "aten/src/ATen/native/metal/ops/MetalChunk.mm",
+    "aten/src/ATen/native/metal/ops/MetalClamp.mm",
+    "aten/src/ATen/native/metal/ops/MetalConcat.mm",
+    "aten/src/ATen/native/metal/ops/MetalConvolution.mm",
+    "aten/src/ATen/native/metal/ops/MetalCopy.mm",
+    "aten/src/ATen/native/metal/ops/MetalHardswish.mm",
+    "aten/src/ATen/native/metal/ops/MetalLeakyReLU.mm",
+    "aten/src/ATen/native/metal/ops/MetalNeurons.mm",
+    "aten/src/ATen/native/metal/ops/MetalPadding.mm",
+    "aten/src/ATen/native/metal/ops/MetalPooling.mm",
+    "aten/src/ATen/native/metal/ops/MetalReduce.mm",
+    "aten/src/ATen/native/metal/ops/MetalReshape.mm",
+    "aten/src/ATen/native/metal/ops/MetalSoftmax.mm",
+    "aten/src/ATen/native/metal/ops/MetalTranspose.mm",
+    "aten/src/ATen/native/metal/ops/MetalUpsamplingNearest.mm",
+]
+
+UNET_METAL_PREPACK_SOURCE_LIST = [
+    "unet_metal_prepack.cpp",
+    "unet_metal_prepack.mm",
+]
+
+METAL_MASKRCNN_SOURCE_LIST = [
+    "maskrcnn/srcs/GenerateProposals.mm",
+    "maskrcnn/srcs/RoIAlign.mm",
+]
+
+# The get_template_source_dict() returns a dict containing a path prefix
+# and a list of .cpp source files containing operator definitions and
+# registrations that should get selected via templated selective build.
+# The file selected_mobile_ops.h has the list of selected top level
+# operators.
+# NB: doesn't include generated files; copy_template_registration_files
+# handles those specially
+def get_template_source_dict():
+    ret = {}
+    for file_path in TEMPLATE_SOURCE_LIST:
+        path_prefix = paths.dirname(file_path)
+        if path_prefix not in ret:
+            ret[path_prefix] = []
+        ret[path_prefix].append(file_path)
+    return ret
+
+def get_gen_oplist_outs():
+    return {
+        "SupportedMobileModelsRegistration.cpp": [
+            "SupportedMobileModelsRegistration.cpp",
+        ],
+        "selected_mobile_ops.h": [
+            "selected_mobile_ops.h",
+        ],
+        "selected_operators.yaml": [
+            "selected_operators.yaml",
+        ],
+    }
+
+def get_generate_code_bin_outs():
+    outs = {
+        "autograd/generated/ADInplaceOrViewTypeEverything.cpp": ["autograd/generated/ADInplaceOrViewTypeEverything.cpp"],
+        "autograd/generated/ADInplaceOrViewType_0.cpp": ["autograd/generated/ADInplaceOrViewType_0.cpp"],
+        "autograd/generated/ADInplaceOrViewType_1.cpp": ["autograd/generated/ADInplaceOrViewType_1.cpp"],
+        "autograd/generated/Functions.cpp": ["autograd/generated/Functions.cpp"],
+        "autograd/generated/Functions.h": ["autograd/generated/Functions.h"],
+        "autograd/generated/TraceTypeEverything.cpp": ["autograd/generated/TraceTypeEverything.cpp"],
+        "autograd/generated/TraceType_0.cpp": ["autograd/generated/TraceType_0.cpp"],
+        "autograd/generated/TraceType_1.cpp": ["autograd/generated/TraceType_1.cpp"],
+        "autograd/generated/TraceType_2.cpp": ["autograd/generated/TraceType_2.cpp"],
+        "autograd/generated/TraceType_3.cpp": ["autograd/generated/TraceType_3.cpp"],
+        "autograd/generated/TraceType_4.cpp": ["autograd/generated/TraceType_4.cpp"],
+        "autograd/generated/VariableType.h": ["autograd/generated/VariableType.h"],
+        "autograd/generated/VariableTypeEverything.cpp": ["autograd/generated/VariableTypeEverything.cpp"],
+        "autograd/generated/VariableType_0.cpp": ["autograd/generated/VariableType_0.cpp"],
+        "autograd/generated/VariableType_1.cpp": ["autograd/generated/VariableType_1.cpp"],
+        "autograd/generated/VariableType_2.cpp": ["autograd/generated/VariableType_2.cpp"],
+        "autograd/generated/VariableType_3.cpp": ["autograd/generated/VariableType_3.cpp"],
+        "autograd/generated/VariableType_4.cpp": ["autograd/generated/VariableType_4.cpp"],
+        "autograd/generated/variable_factories.h": ["autograd/generated/variable_factories.h"],
+    }
+
+    if is_arvr_mode():
+        outs.update({
+            "autograd/generated/python_fft_functions.cpp": ["autograd/generated/python_fft_functions.cpp"],
+            "autograd/generated/python_functions.h": ["autograd/generated/python_functions.h"],
+            "autograd/generated/python_functions_0.cpp": ["autograd/generated/python_functions_0.cpp"],
+            "autograd/generated/python_functions_1.cpp": ["autograd/generated/python_functions_1.cpp"],
+            "autograd/generated/python_functions_2.cpp": ["autograd/generated/python_functions_2.cpp"],
+            "autograd/generated/python_functions_3.cpp": ["autograd/generated/python_functions_3.cpp"],
+            "autograd/generated/python_functions_4.cpp": ["autograd/generated/python_functions_4.cpp"],
+            "autograd/generated/python_linalg_functions.cpp": ["autograd/generated/python_linalg_functions.cpp"],
+            "autograd/generated/python_nn_functions.cpp": ["autograd/generated/python_nn_functions.cpp"],
+            "autograd/generated/python_return_types.cpp": ["autograd/generated/python_return_types.cpp"],
+            "autograd/generated/python_sparse_functions.cpp": ["autograd/generated/python_sparse_functions.cpp"],
+            "autograd/generated/python_special_functions.cpp": ["autograd/generated/python_special_functions.cpp"],
+            "autograd/generated/python_torch_functions_0.cpp": ["autograd/generated/python_torch_functions_0.cpp"],
+            "autograd/generated/python_torch_functions_1.cpp": ["autograd/generated/python_torch_functions_1.cpp"],
+            "autograd/generated/python_torch_functions_2.cpp": ["autograd/generated/python_torch_functions_2.cpp"],
+            "autograd/generated/python_variable_methods.cpp": ["autograd/generated/python_variable_methods.cpp"],
+        })
+    return outs
+
+def get_template_registration_files_outs():
+    outs = {}
+    for file_path in TEMPLATE_MASKRCNN_SOURCE_LIST:
+        outs[file_path] = [file_path]
+
+    for file_path in TEMPLATE_BATCH_BOX_COX_SOURCE_LIST:
+        outs[file_path] = [file_path]
+
+    for file_path in TEMPLATE_SOURCE_LIST:
+        outs[file_path] = [file_path]
+
+    for base_name in aten_ufunc_generated_all_cpu_sources():
+        file_path = "aten/src/ATen/{}".format(base_name)
+        outs[file_path] = [file_path]
+
+    return outs
+
+def get_template_registration_file_rules(rule_name):
+    rules = []
+    for file_path in TEMPLATE_SOURCE_LIST + TEMPLATE_MASKRCNN_SOURCE_LIST + TEMPLATE_BATCH_BOX_COX_SOURCE_LIST:
+        rules.append(":{}[{}]".format(rule_name, file_path))
+    for file_path in aten_ufunc_generated_all_cpu_sources():
+        rules.append(":{}[aten/src/ATen/{}]".format(rule_name, file_path))
+
+    return rules
+
+# ---------------------METAL RULES---------------------
+def get_metal_source_dict():
+    ret = {}
+    for file_path in METAL_SOURCE_LIST:
+        path_prefix = paths.dirname(file_path)
+        if path_prefix not in ret:
+            ret[path_prefix] = []
+        ret[path_prefix].append(file_path)
+    return ret
+
+def get_metal_registration_files_outs():
+    outs = {}
+    for file_path in METAL_SOURCE_LIST:
+        outs[file_path] = [file_path]
+
+    for file_path in UNET_METAL_PREPACK_SOURCE_LIST:
+        outs[file_path] = [file_path]
+
+    for file_path in METAL_MASKRCNN_SOURCE_LIST:
+        outs[file_path] = [file_path]
+    return outs
+
+# There is a really weird issue with the arvr windows builds where
+# the custom op files are breaking them. See https://fburl.com/za87443c
+# The hack is just to not build them for that platform and pray they arent needed.
+def get_metal_registration_files_outs_windows():
+    outs = {}
+    for file_path in METAL_SOURCE_LIST:
+        outs[file_path] = [file_path]
+    return outs
+
+def get_metal_registration_files_rules(rule_name):
+    ret = {}
+    objc_rules = []
+    cxx_rules = []
+
+    for file_path in METAL_SOURCE_LIST + METAL_MASKRCNN_SOURCE_LIST + UNET_METAL_PREPACK_SOURCE_LIST:
+        if ".cpp" not in file_path:
+            objc_rules.append(":{}[{}]".format(rule_name, file_path))
+        else:
+            cxx_rules.append(":{}[{}]".format(rule_name, file_path))
+    ret["objc"] = objc_rules
+    ret["cxx"] = cxx_rules
+    return ret
+
+def get_metal_registration_files_rules_windows(rule_name):
+    ret = {}
+    objc_rules = []
+    cxx_rules = []
+
+    for file_path in METAL_SOURCE_LIST:
+        if ".cpp" not in file_path:
+            objc_rules.append(":{}[{}]".format(rule_name, file_path))
+        else:
+            cxx_rules.append(":{}[{}]".format(rule_name, file_path))
+    ret["objc"] = objc_rules
+    ret["cxx"] = cxx_rules
+    return ret
diff --git a/test/defs.bzl b/test/defs.bzl
new file mode 100644
index 000000000000..0e92326402dd
--- /dev/null
+++ b/test/defs.bzl
@@ -0,0 +1,112 @@
+load("@fbcode_macros//build_defs:python_pytest.bzl", "python_pytest")
+load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")
+load("@fbsource//tools/build_defs/sandcastle:sandcastle_defs.bzl", "is_sandcastle_machine")
+
+def define_python_unittest(pytest = False, **kwargs):
+    build_mode = native.read_config("fbcode", "build_mode_test_label")
+    enable_flatbuffer = bool(native.read_config("fbcode", "caffe2_enable_flatbuffer", None))
+
+    PYTORCH_TEST_WITH_ASAN = "1" if ("asan" in build_mode or build_mode == "dev") else "0"
+
+    PYTORCH_TEST_WITH_DEV_DBG_ASAN = "1" if (build_mode == "dev" or "dev-asan" in build_mode or "dbg-asan" in build_mode or "dbgo-asan" in build_mode) else "0"
+
+    PYTORCH_TEST_WITH_TSAN = "1" if ("tsan" in build_mode) else "0"
+
+    PYTORCH_TEST_WITH_UBSAN = "1" if ("ubsan" in build_mode or build_mode == "dev") else "0"
+
+    NO_MULTIPROCESSING_SPAWN = "1" if is_sandcastle_machine() else "0"
+
+    ENABLE_FLATBUFFER = "1" if enable_flatbuffer else "0"
+
+    # indicates we are running in test env.
+    # "deepcopy" the 'env: Dict[str, str]'
+    kwargs["env"] = dict(kwargs.get("env", {}))
+    kwargs["env"]["PYTORCH_TEST"] = "1"
+    kwargs["env"]["PYTORCH_TEST_FBCODE"] = "1"
+    kwargs["env"]["PYTORCH_TEST_WITH_ASAN"] = PYTORCH_TEST_WITH_ASAN
+    kwargs["env"]["PYTORCH_TEST_WITH_DEV_DBG_ASAN"] = PYTORCH_TEST_WITH_DEV_DBG_ASAN
+    kwargs["env"]["PYTORCH_TEST_WITH_TSAN"] = PYTORCH_TEST_WITH_TSAN
+    kwargs["env"]["PYTORCH_TEST_WITH_UBSAN"] = PYTORCH_TEST_WITH_UBSAN
+    kwargs["env"]["NO_MULTIPROCESSING_SPAWN"] = NO_MULTIPROCESSING_SPAWN
+    kwargs["env"]["ENABLE_FLATBUFFER"] = ENABLE_FLATBUFFER
+
+    # To speed up TP tests.
+    kwargs["env"]["TENSORPIPE_TLS_DATACENTER"] = "test_dc"
+
+    # Run CUDA tests on GPUs
+    if kwargs.get("name").endswith("cuda"):
+        # "deepcopy" the 'tags: List[str]'
+        kwargs["tags"] = list(kwargs.get("tags", []))
+        kwargs["tags"].extend([
+            "re_opts_capabilities={\"platform\": \"gpu-remote-execution\", \"subplatform\": \"P100\"}",
+            "supports_remote_execution",
+            "run_as_bundle",
+            "tpx:experimental-shard-size-for-bundle=100",
+        ])
+        kwargs["env"]["PYTORCH_TEST_REMOTE_GPU"] = "1"
+
+    if pytest:
+        python_pytest(
+            **kwargs
+        )
+    else:
+        python_unittest(
+            **kwargs
+        )
+
+def define_mp_tests(tests, additional_deps = None, pytest = False, **kwargs):
+    # LeakSanitizer doesn't work for python multiprocessing.
+    # See https://fb.workplace.com/groups/fbcode/posts/2625521060818050/
+    # and https://fb.workplace.com/groups/101100140348621/posts/1278688645923092/
+    extra_env = {
+        "ASAN_OPTIONS": "detect_leaks=0",
+        "CUDA_INJECTION64_PATH": "0",  # resolve kineto TSAN flakiness
+    }
+
+    # Serialize test cases since multiple tests running on same GPUs can
+    # deadlock or there can be port conflicts.
+    if "tags" not in kwargs:
+        kwargs["tags"] = []
+    if "serialize_test_cases" not in kwargs["tags"]:
+        kwargs["tags"].append("serialize_test_cases")
+    define_tests(tests, additional_deps, pytest, extra_env, **kwargs)
+
+def define_q_distributed_test(tests, env = None, additional_deps = None, pytest = False, **kwargs):
+    define_tests(tests, additional_deps, pytest, env, **kwargs)
+
+def define_tests(tests, additional_deps = None, pytest = False, extra_env = {}, **kwargs):
+    if additional_deps == None:
+        additional_deps = {}
+
+    provided_tags = kwargs.pop("tags", [])
+
+    env = {
+        "DOCS_SRC_DIR": "$(location //caffe2/docs/source:doc_files)",
+        "MKL_NUM_THREADS": "1",
+        "OMP_NUM_THREADS": "1",
+        "SKIP_TEST_BOTTLENECK": "1",
+    }
+    env.update(extra_env)
+    for name, srcs in tests.items():
+        tags = list(provided_tags)
+
+        test_deps = ["//caffe2:test-lib"] + additional_deps.get(name, [])
+        define_python_unittest(
+            pytest,
+            name = name,
+            srcs = srcs,
+            base_module = "",
+            compile = "with-source",
+            env = env,
+            py_version = ">=3.5",
+            strip_libpar = True,
+            tags = tags,
+            deps = test_deps,
+            # Depend directly on :libtorch so that tests won't be pruned by the
+            # rdep distance heuristic.
+            cpp_deps = ["//caffe2:libtorch"],
+            runtime_deps = [
+                "//caffe2/docs/source:doc_files",
+            ],
+            **kwargs
+        )
diff --git a/test/distributed/defs.bzl b/test/distributed/defs.bzl
new file mode 100644
index 000000000000..d3b3040ea4c3
--- /dev/null
+++ b/test/distributed/defs.bzl
@@ -0,0 +1,39 @@
+load("@fbsource//tools/build_defs:testpilot_defs.bzl", "special_tags")
+load(
+    "//caffe2/test:defs.bzl",
+    "define_python_unittest",
+)
+
+# These distributed tests need custom environment variables
+def define_distributed_test(**kwargs):
+    # LeakSanitizer doesn't work for python multiprocessing.
+    # See https://fb.workplace.com/groups/fbcode/posts/2625521060818050/
+    # and https://fb.workplace.com/groups/101100140348621/posts/1278688645923092/
+    kwargs["env"]["ASAN_OPTIONS"] = "detect_leaks=0"
+
+    # Resolve kineto TSAN flakiness
+    kwargs["env"]["CUDA_INJECTION64_PATH"] = "0"
+    define_python_unittest(
+        base_module = "",
+        main_module = "fb.test_distributed_trap",
+        py_version = ">=3.5",
+        tags = [special_tags.run_as_bundle],
+        deps = [
+            "//caffe2:test-lib",
+            "//caffe2:torch",
+            "//caffe2/torch/fb/rendezvous:zeus",
+            "//pytorch/vision:torchvision",
+        ],
+        external_deps = [
+            ("numpy", None),
+            ("scipy", None),
+        ],
+        **kwargs
+    )
+
+def define_c10d_distributed_test(srcs, **kwargs):
+    srcs.extend(["fb/test_distributed_trap.py"])
+    define_distributed_test(
+        srcs = srcs + native.glob(["data/*.py"]),
+        **kwargs
+    )
diff --git a/test/distributed/fsdp/defs.bzl b/test/distributed/fsdp/defs.bzl
new file mode 100644
index 000000000000..2e496838c807
--- /dev/null
+++ b/test/distributed/fsdp/defs.bzl
@@ -0,0 +1,22 @@
+load("@bazel_skylib//lib:paths.bzl", "paths")
+load(
+    "//caffe2/test:defs.bzl",
+    "define_mp_tests",
+)
+
+def define_fsdp_tests():
+    test_files = native.glob(["**/test_*.py"])
+
+    TESTS = {}
+
+    additional_deps = {}
+    for test_file in test_files:
+        test_file_name = paths.basename(test_file)
+        test_name = test_file_name.replace("test_", "").replace(".py", "")
+        TESTS[test_name] = [test_file]
+        additional_deps[test_name] = ["//pytorch/vision:torchvision"]
+
+    define_mp_tests(
+        tests = TESTS,
+        additional_deps = additional_deps,
+    )
diff --git a/test/distributed/pipeline/sync/defs.bzl b/test/distributed/pipeline/sync/defs.bzl
new file mode 100644
index 000000000000..0de277bddaef
--- /dev/null
+++ b/test/distributed/pipeline/sync/defs.bzl
@@ -0,0 +1,22 @@
+load("@bazel_skylib//lib:paths.bzl", "paths")
+load(
+    "//caffe2/test:defs.bzl",
+    "define_tests",
+)
+
+def define_pipeline_tests():
+    test_files = native.glob(["**/test_*.py"])
+
+    TESTS = {}
+
+    for test_file in test_files:
+        test_file_name = paths.basename(test_file)
+        test_name = test_file_name.replace("test_", "").replace(".py", "")
+        TESTS[test_name] = [test_file]
+
+    define_tests(
+        pytest = True,
+        tests = TESTS,
+        external_deps = [("pytest", None)],
+        resources = ["conftest.py"],
+    )
diff --git a/third_party/tensorflow_cuda_bazel_build/cuda/build_defs.bzl b/third_party/tensorflow_cuda_bazel_build/cuda/build_defs.bzl
new file mode 100755
index 000000000000..a394b6ce9204
--- /dev/null
+++ b/third_party/tensorflow_cuda_bazel_build/cuda/build_defs.bzl
@@ -0,0 +1,31 @@
+# Macros for building CUDA code.
+def if_cuda(if_true, if_false = []):
+    """Shorthand for select()'ing on whether we're building with CUDA.
+
+    Returns a select statement which evaluates to if_true if we're building
+    with CUDA enabled.  Otherwise, the select statement evaluates to if_false.
+
+    """
+    return select({
+        "@local_config_cuda//cuda:using_clang": if_true,
+        "@local_config_cuda//cuda:using_nvcc": if_true,
+        "//conditions:default": if_false,
+    })
+
+def cuda_default_copts():
+    """Default options for all CUDA compilations."""
+    return if_cuda(["-x", "cuda", "-DGOOGLE_CUDA=1"] + [])
+
+def cuda_is_configured():
+    """Returns true if CUDA was enabled during the configure process."""
+    return True
+
+def if_cuda_is_configured(x):
+    """Tests if the CUDA was enabled during the configure process.
+
+    Unlike if_cuda(), this does not require that we are building with
+    --config=cuda. Used to allow non-CUDA code to depend on CUDA libraries.
+    """
+    if cuda_is_configured():
+        return x
+    return []
diff --git a/tools/cpuinfo_target_definition.bzl b/tools/cpuinfo_target_definition.bzl
new file mode 100644
index 000000000000..27b1c7bb272d
--- /dev/null
+++ b/tools/cpuinfo_target_definition.bzl
@@ -0,0 +1,12 @@
+load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
+load("//caffe2/tools:sgx_target_definitions.bzl", "is_sgx")
+
+def add_cpuinfo_lib():
+    cpp_library(
+        name = "cpuinfo",
+        exported_deps = [
+            "fbsource//third-party/cpuinfo_sgx:cpuinfo_coffeelake",
+        ] if is_sgx else [
+            "fbsource//third-party/cpuinfo:cpuinfo",
+        ],
+    )
diff --git a/tools/miniz_target_definition.bzl b/tools/miniz_target_definition.bzl
new file mode 100644
index 000000000000..7040ff6beaa1
--- /dev/null
+++ b/tools/miniz_target_definition.bzl
@@ -0,0 +1,25 @@
+load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
+load("//caffe2/tools:sgx_target_definitions.bzl", "is_sgx")
+
+def add_miniz_lib():
+    cpp_library(
+        name = "miniz",
+        srcs = [
+            "third_party/miniz-2.0.8/fb/FollyCrcPlugin.cpp",
+            "third_party/miniz-2.0.8/fb/miniz-fb.c",
+        ],
+        headers = {
+            "caffe2/third_party/miniz-2.0.8/miniz.c": "third_party/miniz-2.0.8/miniz.c",
+            "miniz-fb.h": "third_party/miniz-2.0.8/fb/miniz-fb.h",
+            "miniz.h": "third_party/miniz-2.0.8/miniz.h",
+        },
+        header_namespace = "",
+        # -fexceptions is required, otherwise, when we use @mode/opt-clang-thinlto,
+        # c functions become noexcept, and we may not be able to catch exceptions
+        # during model loading.
+        compiler_flags = ["-DUSE_EXTERNAL_MZCRC", "-fexceptions"] + (["-DMINIZ_NO_STDIO"] if is_sgx else []),
+        # folly is only required as a dependency if USE_EXTERNAL_MZCRC
+        # above is defined, and FollyCrcPlugin.cpp is added.
+        # Neither are strictly needed, but run significantly faster.
+        exported_deps = ["//folly/hash:checksum"],
+    )
diff --git a/tools/perf_kernel_defs.bzl b/tools/perf_kernel_defs.bzl
new file mode 100644
index 000000000000..2a699840c8bf
--- /dev/null
+++ b/tools/perf_kernel_defs.bzl
@@ -0,0 +1,54 @@
+load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
+
+is_dbg_build = native.read_config("fbcode", "build_mode", "").find("dbg") != -1
+is_sanitizer = native.read_config("fbcode", "sanitizer", "") != ""
+
+def define_perf_kernels(prefix, levels_and_flags, compiler_common_flags, dependencies, external_deps):
+    vectorize_flags = ([
+        # "-Rpass=loop-vectorize", # Add vectorization information to output
+        "-DENABLE_VECTORIZATION=1",
+        "-fveclib=SVML",
+    ] if not is_dbg_build and not is_sanitizer else [])
+
+    compiler_specific_flags = {
+        "clang": vectorize_flags,
+        "gcc": [],
+    }
+
+    compiler_specific_flags["clang"] += ["-Wno-pass-failed"]
+
+    common_srcs = native.glob(
+        ["**/*.cc"],
+        exclude = [
+            "**/*_avx512.cc",
+            "**/*_avx2.cc",
+            "**/*_avx.cc",
+        ],
+    )
+
+    cpp_headers = native.glob(
+        ["**/*.h"],
+    )
+
+    kernel_targets = []
+    for level, flags in levels_and_flags:
+        cpp_library(
+            name = prefix + "perfkernels_" + level,
+            srcs = native.glob(["**/*_" + level + ".cc"]),
+            headers = cpp_headers,
+            compiler_flags = compiler_common_flags + flags,
+            compiler_specific_flags = compiler_specific_flags,
+            exported_deps = dependencies,
+            exported_external_deps = external_deps,
+        )
+        kernel_targets.append(":" + prefix + "perfkernels_" + level)
+
+    cpp_library(
+        name = prefix + "perfkernels",
+        srcs = common_srcs,
+        headers = cpp_headers,
+        compiler_flags = compiler_common_flags,
+        compiler_specific_flags = compiler_specific_flags,
+        link_whole = True,
+        exported_deps = kernel_targets + dependencies,
+    )
diff --git a/tools/rules/METADATA.bzl b/tools/rules/METADATA.bzl
new file mode 100644
index 000000000000..a1e9c277630c
--- /dev/null
+++ b/tools/rules/METADATA.bzl
@@ -0,0 +1,9 @@
+# THIS FILE IS AUTOMATICALLY GENERATED FROM INFORMATION STORED IN
+# THIRD-PARTY METADATA SERVICE. YOUR MANUAL CHANGES TO THIS FILE WILL
+# BE PRESERVED AND WILL SERVE AS THE SOURCE OF TRUTH FOR METADATA OF
+# THIS PACKAGE.
+# TPMS-GENERATED: b3448f8fd2a893772f944f37627e63917b77dede
+METADATA = {
+    "name": "rules",
+    "owner": "pytorch_dev_infra",
+}
diff --git a/tools/sgx_aten_target_definitions.bzl b/tools/sgx_aten_target_definitions.bzl
new file mode 100644
index 000000000000..48886ae16fe2
--- /dev/null
+++ b/tools/sgx_aten_target_definitions.bzl
@@ -0,0 +1,261 @@
+load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
+load("@fbcode_macros//build_defs:custom_rule.bzl", "custom_rule")
+load("//caffe2:build.bzl", "GENERATED_CPP")
+load("//caffe2:build_variables.bzl", "jit_core_headers", "jit_core_sources")
+load("//caffe2/tools:sgx_target_definitions.bzl", "is_sgx")
+
+default_compiler_flags = [
+    "-Wno-error=strict-aliasing",
+    "-Wno-unused-local-typedefs",
+    "-Wno-shadow-compatible-local",
+    "-Wno-maybe-uninitialized",  # aten is built with gcc as part of HHVM
+    "-Wno-unknown-pragmas",
+    "-Wno-strict-overflow",
+    # See https://fb.facebook.com/groups/fbcode/permalink/1813348245368673/
+    # These trigger on platform007
+    "-Wno-stringop-overflow",
+    "-Wno-class-memaccess",
+    "-DHAVE_MMAP",
+    "-DUSE_GCC_ATOMICS=1",
+    "-D_FILE_OFFSET_BITS=64",
+    "-DHAVE_SHM_OPEN=1",
+    "-DHAVE_SHM_UNLINK=1",
+    "-DHAVE_MALLOC_USABLE_SIZE=1",
+    "-DTH_HAVE_THREAD",
+    "-DCPU_CAPABILITY_DEFAULT",
+    "-DTH_INDEX_BASE=0",
+    "-DMAGMA_V2",
+    "-DNO_CUDNN_DESTROY_HANDLE",
+    "-DUSE_QNNPACK",
+    "-DUSE_PYTORCH_QNNPACK",
+    # The dynamically loaded NVRTC trick doesn't work in fbcode,
+    # and it's not necessary anyway, because we have a stub
+    # nvrtc library which we load canonically anyway
+    "-DUSE_DIRECT_NVRTC",
+    "-DUSE_XNNPACK",
+    "-Wno-error=uninitialized",
+]
+
+compiler_specific_flags = {
+    "clang": [
+        "-Wno-absolute-value",
+        "-Wno-pass-failed",
+        "-Wno-braced-scalar-init",
+    ],
+    "gcc": [
+        "-Wno-error=array-bounds",
+    ],
+}
+
+def add_sgx_aten_libs(ATEN_HEADERS_CPU_MKL, ATEN_SRCS_CPU_MKL, ATEN_CORE_CPP):
+    # we do not need to define these targets if we are in not SGX mode
+    if not is_sgx:
+        return
+
+    x64_compiler_flags = [
+        "-DUSE_SSE2",
+        "-DUSE_SSE3",
+        "-DUSE_SSE4_1",
+        "-DUSE_SSE4_2",
+        # dont enable AVX2 because we dont have runtime dispatch
+        "-DCPU_CAPABILITY_DEFAULT",
+        "-DCPU_CAPABILITY=DEFAULT",
+        "-DTH_INDEX_BASE=0",
+        "-DTH_INDEX_BASE=0",
+        "-msse",
+        "-msse2",
+        "-msse3",
+        "-msse4",
+        "-msse4.1",
+        "-msse4.2",
+        "-mavx",
+        "-mavx2",
+    ]
+
+    cpu_preprocessor_flags = [
+        "-DATEN_MKLDNN_ENABLED_FBCODE=0",
+        "-DATEN_NNPACK_ENABLED_FBCODE=0",
+        "-DATEN_MKL_ENABLED_FBCODE=0",
+        "-DAT_BUILD_WITH_BLAS_FBCODE=1",
+        "-DAT_BLAS_USE_CBLAS_DOT_FBCODE=1",
+        "-DAT_BLAS_F2C_FBCODE=0",
+        "-DATEN_CUDNN_ENABLED_FBCODE=1",
+        "-DATEN_ROCM_ENABLED_FBCODE=0",
+        "-DC10_MOBILE",
+        "-DAT_PARALLEL_NATIVE_FBCODE=1",
+    ]
+
+    custom_rule(
+        name = "generate-sgx-config",
+        srcs = [
+            "src/ATen/Config.h.in",
+        ],
+        build_args = " ".join([
+            "--input-file",
+            "src/ATen/Config.h.in",
+            "--output-file",
+            "Config.h",
+            "--replace",
+            "@AT_MKLDNN_ENABLED@",
+            "0",
+            "--replace",
+            "@AT_MKL_ENABLED@",
+            "0",
+            "--replace",
+            "@AT_MKL_SEQUENTIAL@",
+            "0",
+            "--replace",
+            "@AT_FFTW_ENABLED@",
+            "0",
+            "--replace",
+            "@AT_POCKETFFT_ENABLED@",
+            "0",
+            "--replace",
+            "@AT_NNPACK_ENABLED@",
+            "ATEN_NNPACK_ENABLED_FBCODE",
+            "--replace",
+            "@AT_BUILD_WITH_BLAS@",
+            "1",
+            "--replace",
+            "@AT_BUILD_WITH_LAPACK@",
+            "0",
+            "--replace",
+            "@CAFFE2_STATIC_LINK_CUDA_INT@",
+            "0",
+            "--replace",
+            "@AT_BLAS_F2C@",
+            "AT_BLAS_F2C_FBCODE",
+            "--replace",
+            "@AT_BLAS_USE_CBLAS_DOT@",
+            "AT_BLAS_USE_CBLAS_DOT_FBCODE",
+            "--replace",
+            "@AT_PARALLEL_OPENMP@",
+            "0",
+            "--replace",
+            "@AT_PARALLEL_NATIVE@",
+            "1",
+            "--replace",
+            "@AT_PARALLEL_NATIVE_TBB@",
+            "0",
+        ]),
+        build_script_dep = "//caffe2:substitute",
+        output_gen_files = ["Config.h"],
+    )
+
+    cpp_library(
+        name = "generated-sgx-config-header",
+        headers = [":generate-sgx-config=Config.h"],
+        header_namespace = "ATen",
+    )
+
+    ATEN_CORE_H = native.glob([
+        "src/ATen/core/*.h",
+        "src/ATen/core/boxing/*.h",
+        "src/ATen/core/boxing/impl/*.h",
+        "src/ATen/core/dispatch/*.h",
+        "src/ATen/core/op_registration/*.h",
+    ]) + [
+        "src/ATen/CPUGeneratorImpl.h",
+        "src/ATen/NumericUtils.h",
+    ]
+
+    cpp_library(
+        name = "ATen-core-sgx-headers",
+        headers = ATEN_CORE_H,
+        propagated_pp_flags = [
+            "-Icaffe2/aten/src",
+        ],
+        exported_deps = [
+            "//caffe2:generated-aten-headers-core",
+            "//caffe2/c10:c10",
+        ],
+    )
+
+    cpp_library(
+        name = "ATen-sgx-core",
+        # Sorry, this is duped with GENERATED_CPP_CORE.  I was too lazy to refactor
+        # the list into a bzl file
+        srcs = ATEN_CORE_CPP + [
+            ":gen_aten=Operators_0.cpp",
+            ":gen_aten=Operators_1.cpp",
+            ":gen_aten=Operators_2.cpp",
+            ":gen_aten=Operators_3.cpp",
+            ":gen_aten=Operators_4.cpp",
+            ":gen_aten=core/ATenOpList.cpp",
+            ":gen_aten=core/TensorMethods.cpp",
+        ],
+        headers = native.glob([
+            "src/ATen/*.h",
+            "src/ATen/ops/*.h",
+            "src/ATen/quantized/*.h",
+        ]),
+        compiler_flags = default_compiler_flags,
+        compiler_specific_flags = compiler_specific_flags,
+        link_whole = True,
+        # Tests that fail in CPU static dispatch mode because they require
+        # the dispatcher in order to work can be gated out with `#ifndef
+        # ATEN_CPU_STATIC_DISPATCH`.
+        propagated_pp_flags = [],
+        # Must be linked with caffe2_core
+        undefined_symbols = True,
+        exported_deps = [
+            ":ATen-core-sgx-headers",
+            "//caffe2:jit-core-sgx",
+        ],
+    )
+
+    cpp_library(
+        name = "ATen-sgx-cpu",
+        srcs = ATEN_SRCS_CPU_MKL + [":gen_aten=" + x for x in GENERATED_CPP],
+        headers = ATEN_HEADERS_CPU_MKL,
+        arch_compiler_flags = {"x86_64": x64_compiler_flags},
+        compiler_flags = default_compiler_flags,
+        compiler_specific_flags = compiler_specific_flags,
+        include_directories = [
+            "src",
+            "src/TH",
+        ],
+        link_whole = True,
+        propagated_pp_flags = cpu_preprocessor_flags,
+        exported_deps = [
+            "fbsource//third-party/cpuinfo_sgx:cpuinfo_coffeelake",
+            ":ATen-sgx-core",
+            ":aten-headers-cpu",
+            ":generated-aten-headers-cpu",
+            ":generated-sgx-config-header",
+            ":generated-sgx-th-general-header",
+            ":generated-sgx-th-general-header-no-prefix",
+            "//caffe2/caffe2:caffe2_sgx_core",
+            "//caffe2/caffe2/perfkernels:sgx_perfkernels",
+            "//xplat/third-party/XNNPACK:XNNPACK",
+        ],
+        exported_external_deps = [
+            ("OpenBLAS", None, "OpenBLAS"),
+        ],
+        deps = [
+            "//caffe2/aten/src/ATen/native/quantized/cpu/qnnpack:pytorch_qnnpack",
+        ],
+    )
+
+def add_sgx_aten_jit_libs():
+    # we do not need to define these targets if we are in not SGX mode
+    if not is_sgx:
+        return
+
+    cpp_library(
+        name = "jit-core-sgx",
+        # Sorry, this is duped with GENERATED_CPP_CORE.  I was too lazy to refactor
+        # the list into a bzl file
+        srcs = jit_core_sources,
+        headers = jit_core_headers,
+        compiler_flags = default_compiler_flags,
+        compiler_specific_flags = compiler_specific_flags,
+        include_directories = [""],
+        link_whole = True,
+        # Must be linked with caffe2_core
+        undefined_symbols = True,
+        exported_deps = [
+            "//caffe2:ATen-core-sgx-headers",
+            "//caffe2/c10:c10",
+        ],
+    )
diff --git a/tools/sgx_caffe2_target_definitions.bzl b/tools/sgx_caffe2_target_definitions.bzl
new file mode 100644
index 000000000000..551244fe8c96
--- /dev/null
+++ b/tools/sgx_caffe2_target_definitions.bzl
@@ -0,0 +1,253 @@
+load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
+load("//caffe2/caffe2:defs.bzl", "get_sgx_patterns")
+load("//caffe2/tools:perf_kernel_defs.bzl", "define_perf_kernels")
+load("//caffe2/tools:sgx_target_definitions.bzl", "is_sgx")
+
+def add_sgx_caffe_libs():
+    # we do not need to define these targets if we are in not SGX mode
+    if not is_sgx:
+        return
+
+    core_file_patterns = [
+        "core/allocator.cc",
+        "core/logging.cc",
+        "core/flags.cc",
+        "core/common.cc",
+        "core/context.cc",
+        "core/event.cc",
+        "core/context_base.cc",
+        "core/numa.cc",
+        "core/blob_serialization.cc",
+        "core/tensor.cc",
+        "core/types.cc",
+        "core/blob_stats.cc",
+        "opt/converter.cc",
+        "opt/annotations.cc",
+        "utils/cpuid.cc",
+        "utils/threadpool/ThreadPool.cc",
+        "utils/threadpool/pthreadpool-cpp.cc",
+        "utils/threadpool/thread_pool_guard.cpp",
+        "utils/proto_utils.cc",
+    ]
+
+    core_srcs = native.glob(
+        core_file_patterns,
+    )
+
+    core_external_deps = [
+        "protobuf",
+        "glog",
+        "sparsehash",
+        "zstd",
+    ]
+
+    core_internal_deps = [
+        "fbsource//third-party/fmt:fmt",
+        "//caffe/proto:fb_protobuf",
+        "//caffe2/caffe2/proto:fb_protobuf",
+        "//caffe2/c10:c10",
+        "//common/base:exception",
+        "//common/logging:logging",
+    ]
+
+    internal_deps = core_internal_deps + [
+        # "//libfb/py/mkl:mkl_dep_handle_lp64",
+        "//onnx/onnx:onnx_lib",
+        "//foxi:foxi_loader",
+        "//caffe2/caffe2/fb/onnxifi:fbonnxifi_loader_stub",
+        # "//rocksdb:rocksdb",
+        "//caffe2:cpuinfo",
+        "//xplat/QNNPACK:QNNPACK",
+        "//folly/experimental/symbolizer:symbolizer",
+        "//folly/hash:hash",
+        "//folly/io:iobuf",
+        "//folly:conv",
+        "//folly:dynamic",
+        "//folly:executor",
+        "//folly:format",
+        "//folly:json",
+        "//folly:map_util",
+        "//folly:memory",
+        "//folly:mpmc_queue",
+        "//folly:optional",
+        "//folly:random",
+        "//folly:range",
+        "//folly/synchronization:rw_spin_lock",
+        "//folly:singleton",
+        "//folly:string",
+        "//folly:synchronized",
+        "//folly:thread_local",
+        "//folly:traits",
+        "//caffe2:ATen-core-headers",
+        # important dependency to claim space for future refactorings
+        "//caffe2:ATen-cpu",
+        "//caffe2/caffe2/perfkernels:perfkernels",
+        "//xplat/third-party/FP16:FP16",
+        "fbsource//third-party/neon2sse:neon2sse",
+    ]
+
+    exclude = [
+        # hip files are obtained from defs_hip.bzl
+        # do not include in the cpu/cuda build
+        "**/hip/**/*",
+        "test/caffe2_gtest_main.cc",
+        "quantization/server/**/*",
+        "fb/async/comm/**/*",
+        "fb/monitoring/**/*",
+        "fb/session/**/*",
+        # utils/knobs.cc and utils/knob_patcher.cc are only used in the open-source build
+        # The internal build uses versions from fb/utils/ instead.
+        "utils/knobs.cc",
+        "utils/knob_patcher.cc",
+    ]
+
+    core_file_patterns = [
+        "core/allocator.cc",
+        "core/logging.cc",
+        "core/flags.cc",
+        "core/common.cc",
+        "core/context.cc",
+        "core/event.cc",
+        "core/context_base.cc",
+        "core/numa.cc",
+        "core/blob_serialization.cc",
+        "core/tensor.cc",
+        "core/types.cc",
+        "core/blob_stats.cc",
+        "opt/converter.cc",
+        "opt/annotations.cc",
+        "utils/cpuid.cc",
+        "utils/threadpool/ThreadPool.cc",
+        "utils/threadpool/pthreadpool-cpp.cc",
+        "utils/threadpool/thread_pool_guard.cpp",
+        "utils/proto_utils.cc",
+    ]
+
+    test_file_patterns = get_sgx_patterns([
+        "_test.cc",
+        "_test.cpp",
+    ])
+
+    gpu_file_patterns = get_sgx_patterns([
+        "_gpu.cc",
+        "_cudnn.cc",
+    ])
+
+    cpu_file_patterns = get_sgx_patterns([
+        ".cc",
+        ".cpp",
+    ])
+
+    cpp_srcs = native.glob(
+        cpu_file_patterns,
+        exclude = exclude + gpu_file_patterns + test_file_patterns + core_file_patterns,
+    )
+
+    pp_flags = [
+        "-Icaffe2",
+        "-Imodules",
+        "-DEIGEN_NO_DEBUG",
+        "-DCAFFE2_USE_GOOGLE_GLOG",
+        "-DCAFFE2_NO_CROSS_ARCH_WARNING",
+        "-DCAFFE2_USE_EXCEPTION_PTR",
+        # Work-around for incompatible thread pools in Caffe2 and NNPACK
+        "-DFBCODE_CAFFE2",
+        "-DUSE_PTHREADPOOL",
+        "-DC10_MOBILE",
+    ]
+
+    compiler_flags = [
+        "-Wno-unknown-pragmas",
+        "-Wno-narrowing",
+        "-Wno-missing-braces",
+        "-Wno-strict-overflow",
+        "-mno-avx",
+        "-Wno-error=unused-result",
+    ]
+
+    cpu_header_patterns = [
+        "**/*.h",
+    ]
+
+    cpp_headers = native.glob(
+        cpu_header_patterns,
+        exclude = exclude,
+    )
+
+    cpp_library(
+        name = "caffe2_sgx_headers",
+        headers = cpp_headers,
+        propagated_pp_flags = pp_flags,
+        exported_deps = core_internal_deps + [
+            "//folly/io/async:async_base",
+            "//caffe2/aten:ATen-core-sgx-headers",
+        ],
+        exported_external_deps = core_external_deps,
+    )
+
+    cpp_library(
+        name = "caffe2_sgx_core",
+        srcs = core_srcs + [
+            "serialize/inline_container.cc",
+            "serialize/crc.cc",
+            "serialize/file_adapter.cc",
+            "serialize/istream_adapter.cc",
+            "serialize/read_adapter_interface.cc",
+        ],
+        compiler_flags = compiler_flags,
+        link_whole = True,
+        propagated_pp_flags = pp_flags,
+        exported_deps = core_internal_deps + [
+            "//caffe2/aten:ATen-sgx-core",
+            "//caffe2/caffe2/core/nomnigraph:nomnigraph",
+            "//xplat/third-party/pthreadpool:pthreadpool",
+            "//caffe2:miniz",
+        ],
+        exported_external_deps = core_external_deps,
+    )
+
+def add_sgx_perf_kernel_libs():
+    # we do not need to define these targets if we are in not SGX mode
+    if not is_sgx:
+        return
+
+    dependencies = [
+        "//caffe2/caffe2:caffe2_sgx_headers",
+        "//caffe2/aten:ATen-core-sgx-headers",
+    ]
+
+    compiler_common_flags = [
+        "-DCAFFE2_PERF_WITH_AVX2",
+        "-DCAFFE2_PERF_WITH_AVX",
+    ]
+
+    external_deps = []
+
+    # these are esentially disabled for hte sgx build but we still need them
+    # to avoid linking issues
+    levels_and_flags = ([
+        (
+            "avx2",
+            [
+                "-mavx2",
+                "-mfma",
+                "-mavx",
+                "-mf16c",
+            ],
+        ),
+        (
+            "avx",
+            [
+                "-mavx",
+                "-mf16c",
+            ],
+        ),
+    ])
+
+    define_perf_kernels(
+        prefix = "sgx_",
+        levels_and_flags = levels_and_flags,
+        compiler_common_flags = compiler_common_flags,
+        dependencies = dependencies,
+        external_deps = external_deps,
+    )
diff --git a/tools/sgx_target_definitions.bzl b/tools/sgx_target_definitions.bzl
new file mode 100644
index 000000000000..2cb816e1cc9b
--- /dev/null
+++ b/tools/sgx_target_definitions.bzl
@@ -0,0 +1,96 @@
+load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
+load("@fbsource//tools/build_defs:buckconfig.bzl", "read_bool")
+load(
+    "//caffe2:build_variables.bzl",
+    "core_sources_common",
+    "core_sources_full_mobile",
+    "core_trainer_sources",
+    "libtorch_extra_sources",
+    "libtorch_generated_sources",
+)
+
+is_sgx = read_bool("fbcode", "sgx_mode", False)
+
+def libtorch_sgx_sources(gencode_pattern = ":generate-code[{}]"):
+    libtorch_core_mobile_sources = sorted(core_sources_common + core_sources_full_mobile + core_trainer_sources)
+
+    sgx_sources_to_exclude = [
+        "torch/csrc/jit/tensorexpr/llvm_codegen.cpp",
+        "torch/csrc/jit/tensorexpr/llvm_jit.cpp",
+        "torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp",
+    ]
+
+    return libtorch_generated_sources(gencode_pattern) + [i for i in libtorch_core_mobile_sources if i not in sgx_sources_to_exclude] + [i for i in libtorch_extra_sources if i not in sgx_sources_to_exclude]
+
+def add_sgx_torch_libs():
+    # we do not need to define these targets if we are in not SGX mode
+    if not is_sgx:
+        return
+
+    compiler_flags_cpu = [
+        "-DNO_CUDNN_DESTROY_HANDLE",
+        "-DPYTORCH_ONNX_CAFFE2_BUNDLE",
+        "-DTORCH_ENABLE_LLVM",
+        "-Wno-write-strings",
+        "-Wno-format",
+        "-Wno-strict-aliasing",
+        "-Wno-non-virtual-dtor",
+        "-Wno-shadow-compatible-local",
+        "-Wno-empty-body",
+        "-DUSE_XNNPACK",
+    ]
+
+    propagated_pp_flags_cpu = [
+        "-DSYMBOLICATE_MOBILE_DEBUG_HANDLE",
+        "-DC10_MOBILE",
+    ]
+
+    include_directories = [
+        "..",
+        ".",
+        "torch/csrc/api/include",
+        "torch/csrc",
+        "torch/csrc/nn",
+        "torch/lib",
+    ]
+
+    common_flags = {
+        "compiler_specific_flags": {
+            "clang": [
+                "-Wno-absolute-value",
+                "-Wno-expansion-to-defined",
+                "-Wno-pessimizing-move",
+                "-Wno-return-type-c-linkage",
+                "-Wno-unknown-pragmas",
+            ],
+        },
+        "headers": native.glob(["torch/csrc/**/*.h", "torch/csrc/generic/*.cpp", "test/cpp/jit/*.h", "test/cpp/tensorexpr/*.h"]),
+    }
+
+    _libtorch_sgx_sources = list(libtorch_sgx_sources())
+
+    cpp_library(
+        name = "libtorch-sgx",
+        srcs = _libtorch_sgx_sources + [
+            "fb/supported_mobile_models/SupportedMobileModels.cpp",
+            "torch/csrc/jit/mobile/function.cpp",
+            "torch/csrc/jit/mobile/import.cpp",
+            "torch/csrc/jit/mobile/interpreter.cpp",
+            "torch/csrc/jit/mobile/module.cpp",  # this is only needed to load the model from caffe2/test/cpp/lite_interpreter_runtime/delegate_test.ptl
+        ],
+        link_whole = True,
+        include_directories = include_directories,
+        propagated_pp_flags = propagated_pp_flags_cpu,
+        exported_deps = [
+            ":generated-autograd-headers",
+            ":generated-version-header",
+            "//caffe2/aten:ATen-sgx-cpu",
+            "//caffe2/caffe2:caffe2_sgx_core",
+            "//onnx/onnx:onnx_lib",
+        ],
+        exported_external_deps = [
+            ("protobuf", None),
+        ],
+        compiler_flags = compiler_flags_cpu,
+        **common_flags
+    )
diff --git a/tools/target_definitions.bzl b/tools/target_definitions.bzl
new file mode 100644
index 000000000000..66b2659050f3
--- /dev/null
+++ b/tools/target_definitions.bzl
@@ -0,0 +1,568 @@
+# @lint-ignore-every BUCKLINT supress the warning for using native
+load("@bazel_skylib//lib:paths.bzl", "paths")
+load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
+load("@fbcode_macros//build_defs:cpp_python_extension.bzl", "cpp_python_extension")
+load("@fbcode_macros//build_defs:custom_rule.bzl", "custom_rule")
+load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")
+load("@fbsource//tools/build_defs:glob_defs.bzl", "glob")
+load(
+    "//caffe2:build_variables.bzl",
+    "glob_libtorch_python_sources",
+    "libtorch_cuda_sources",
+    "libtorch_nvfuser_generated_headers",
+    "libtorch_nvfuser_runtime_sources",
+    "libtorch_python_cuda_sources",
+    "libtorch_sources",
+    "torch_cpp_srcs",
+)
+load(
+    "//caffe2:defs_hip.bzl",
+    "get_hip_flags",
+    "hip_external_deps",
+    "hip_pp_flags",
+)
+load("//caffe2/caffe2/fb:defs_gpu.bzl", "gpu_library_selector", "gpu_library_targets", "is_amd_build")
+load("//tools/build/buck:nccl_deps.bzl", "get_nccl_dependency")
+
+def _path_to_filename(fname):
+    return paths.split_extension(paths.basename(fname))[0]
+
+def use_kineto():
+    return native.host_info().os.is_linux and native.host_info().arch.is_x86_64 and not is_amd_build()
+
+def add_torch_libs():
+    r = {}
+
+    torch_cpp_headers = glob(["torch/csrc/api/include/**/*.h"]) + ["torch/script.h"]
+    libtorch_python_sources = glob_libtorch_python_sources()
+
+    use_mpi = native.read_config("fbcode", "caffe2_use_mpi", None)
+    enable_flatbuffer = bool(native.read_config("fbcode", "caffe2_enable_flatbuffer", None))
+
+    compiler_flags_cpu = [
+        "-DUSE_C10D",
+        "-DUSE_NUMPY",
+        "-DUSE_SCALARS",
+        "-DNO_CUDNN_DESTROY_HANDLE",
+        "-DBUILD_CAFFE2",
+        "-DTORCH_ENABLE_LLVM",
+        "-Wno-write-strings",
+        "-Wno-format",
+        "-Wno-strict-aliasing",
+        "-Wno-non-virtual-dtor",
+        "-Wno-shadow-compatible-local",
+        "-Wno-empty-body",
+    ] + ([] if native.host_info().os.is_windows else [
+        # XNNPACK depends on an updated version of pthreadpool interface, whose implementation
+        # includes <pthread.h> - a header not available on Windows.
+        "-DUSE_XNNPACK",
+    ])
+
+    # We should really include preprocessor flags here
+    # instead of compiler_flags
+    propagated_pp_flags_cpu = [
+        "-DSYMBOLICATE_MOBILE_DEBUG_HANDLE",
+        "-DUSE_DISTRIBUTED",
+        "-DUSE_C10D_GLOO",
+        "-DUSE_RPC",
+        "-DUSE_TENSORPIPE",
+    ] + (
+        ["-DUSE_C10D_MPI"] if use_mpi else []
+    ) + (
+        ["-DUSE_KINETO", "-DUSE_KINETO_UPDATED"] if use_kineto() else []
+    ) + (
+        ["-DENABLE_LIBKINETO_CLIENT"] if native.read_config("kineto", "enable_libkineto_client", "1") == "1" else []
+    )
+
+    compiler_flags_cuda = [
+        "-DUSE_CUDNN",
+        "-DUSE_NCCL",
+    ]
+
+    compiler_flags_hip = []
+
+    propagated_pp_flags_cuda = [
+        "-DUSE_CUDA",
+        "-DUSE_C10D_NCCL",
+    ]
+
+    common_headers = glob([
+        "torch/csrc/**/*.h",
+        # c10d used to be a separate library whose includes ended in .hpp.
+        "torch/csrc/distributed/c10d/*.hpp",
+        "torch/csrc/generic/*.cpp",
+    ]) + [
+        "torch/csrc/deploy/Exception.h",
+        "torch/csrc/deploy/deploy.h",
+        "torch/csrc/deploy/elf_file.h",
+        "torch/csrc/deploy/environment.h",
+        "torch/csrc/deploy/interpreter/builtin_registry.h",
+        "torch/csrc/deploy/interpreter/interpreter_impl.h",
+        "torch/csrc/deploy/loader.h",
+        "torch/csrc/deploy/mem_file.h",
+        "torch/csrc/deploy/noop_environment.h",
+        "torch/csrc/deploy/path_environment.h",
+        "torch/csrc/deploy/unity/tests/test_unity.h",
+        "torch/csrc/deploy/unity/xar_environment.h",
+        "torch/csrc/distributed/rpc/metrics/RpcMetricsHandler.h",
+        "test/cpp/jit/test_custom_class_registrations.h",
+        "test/cpp/jit/test_utils.h",
+        "test/cpp/tensorexpr/gtest_assert_float_eq.h",
+        "test/cpp/tensorexpr/padded_buffer.h",
+        "test/cpp/tensorexpr/test_base.h",
+        "test/cpp/tensorexpr/test_utils.h",
+    ]
+    common_headers.remove("torch/csrc/jit/serialization/mobile_bytecode_generated.h")
+
+    common_flags = {
+        "compiler_specific_flags": {
+            "clang": [
+                "-Wno-absolute-value",
+                "-Wno-expansion-to-defined",
+                "-Wno-pessimizing-move",
+                "-Wno-return-type-c-linkage",
+                "-Wno-unknown-pragmas",
+            ],
+        },
+        "headers": common_headers,
+    }
+
+    include_directories = [
+        "..",
+        ".",
+        "torch/csrc/api/include",
+        "torch/csrc",
+        # c10d used to be a separate library and its includes were c10d/Foo.hpp,
+        # hence we now need this hack to keep supporting them.
+        "torch/csrc/distributed",
+        "torch/csrc/nn",
+    ]
+
+    _libtorch_sources = list(libtorch_sources())
+
+    # Add the Gloo and TensorPipe backends specific to Facebook networking.
+    _libtorch_sources.append("torch/csrc/distributed/c10d/fb/GlooDeviceFactory.cpp")
+    _libtorch_sources.append("torch/csrc/distributed/rpc/fb/tensorpipe_agent.cpp")
+
+    cpp_library(
+        name = "libtorch",
+        srcs = _libtorch_sources + ([
+            "torch/csrc/jit/serialization/flatbuffer_serializer.cpp",
+            "torch/csrc/jit/serialization/flatbuffer_serializer_jit.cpp",
+            "torch/csrc/jit/mobile/flatbuffer_loader.cpp",
+        ] if enable_flatbuffer else []),
+        link_whole = True,
+        include_directories = include_directories,
+        propagated_pp_flags = propagated_pp_flags_cpu + (["-DENABLE_FLATBUFFER"] if enable_flatbuffer else []),
+        exported_deps = (
+            [
+                ":ATen-cpu",
+                ":generated-autograd-headers",
+                ":generated-lazy-headers",
+                "//caffe2:version_cpp",
+                "//caffe2/caffe2:caffe2_cpu",
+                "//caffe2/caffe2/quantization/server:dnnlowp_ops",
+                "//caffe2/caffe2/serialize:inline_container",
+                "//caffe2/torch/lib/libshm:libshm",
+                "//gloo:gloo",
+                "//gloo/fb/transport/tls:tls",
+                "//gloo/transport/tcp:tcp",
+                "//tensorpipe:tensorpipe_cpu",
+            ] + (["//kineto/libkineto:kineto"] if use_kineto() else []) +
+            (["//caffe2:mobile_bytecode"] if enable_flatbuffer else [])
+        ),
+        exported_external_deps = [
+            ("nanopb", None, "protobuf-nanopb"),
+            ("protobuf", None),
+            ("llvm-fb", None, "LLVMAnalysis"),
+            ("llvm-fb", None, "LLVMBPFAsmParser"),
+            ("llvm-fb", None, "LLVMBPFCodeGen"),
+            ("llvm-fb", None, "LLVMCodeGen"),
+            ("llvm-fb", None, "LLVMCore"),
+            ("llvm-fb", None, "LLVMExecutionEngine"),
+            ("llvm-fb", None, "LLVMIRReader"),
+            ("llvm-fb", None, "LLVMInstCombine"),
+            ("llvm-fb", None, "LLVMInterpreter"),
+            ("llvm-fb", None, "LLVMMC"),
+            ("llvm-fb", None, "LLVMNVPTXCodeGen"),
+            ("llvm-fb", None, "LLVMOrcJIT"),
+            ("llvm-fb", None, "LLVMRISCVAsmParser"),
+            ("llvm-fb", None, "LLVMRISCVCodeGen"),
+            ("llvm-fb", None, "LLVMScalarOpts"),
+            ("llvm-fb", None, "LLVMSupport"),
+            ("llvm-fb", None, "LLVMTarget"),
+            ("llvm-fb", None, "LLVMTransformUtils"),
+            ("llvm-fb", None, "LLVMVectorize"),
+            ("llvm-fb", None, "LLVMWebAssemblyAsmParser"),
+            ("llvm-fb", None, "LLVMWebAssemblyCodeGen"),
+            ("llvm-fb", None, "LLVMWebAssemblyInfo"),
+            ("llvm-fb", None, "LLVMX86AsmParser"),
+            ("llvm-fb", None, "LLVMX86CodeGen"),
+            ("llvm-fb", None, "LLVMipo"),
+        ] + ([("openmpi", None, "openmpi")] if use_mpi else []),
+        compiler_flags = compiler_flags_cpu,
+        **common_flags
+    )
+
+    # Below rules are used to stringify NVfuser runtime library into a header files
+    python_binary(
+        name = "nvfuser-stringify",
+        srcs = ["torch/csrc/jit/codegen/cuda/tools/stringify_file.py"],
+        base_module = "",
+        main_module = "torch.csrc.jit.codegen.cuda.tools.stringify_file",
+    )
+
+    # files in libtorch_nvfuser_runtime_sources that are violating package boundaries
+    # are mapped to their corresponding export_file rules.
+    violation_paths_to_rule = {
+        "aten/src/ATen/cuda/detail/PhiloxCudaStateRaw.cuh": ":aten/src/ATen/cuda/detail/PhiloxCudaStateRaw.cuh",
+        "aten/src/ATen/cuda/detail/UnpackRaw.cuh": ":aten/src/ATen/cuda/detail/UnpackRaw.cuh",
+    }
+
+    for name in libtorch_nvfuser_runtime_sources:
+        src_path = violation_paths_to_rule.get(name, name)
+        filename = _path_to_filename(src_path)
+        native.genrule(
+            name = "gen-nvfuser-hdr={}.h".format(filename),
+            srcs = {name: src_path},
+            bash = "$(exe :nvfuser-stringify) -i $SRCDIR/{} -o $OUT".format(name),
+            out = "{}.h".format(filename),
+        )
+    cpp_library(
+        name = "generated-nvfuser-headers",
+        headers = [":gen-nvfuser-hdr=" + x for x in libtorch_nvfuser_generated_headers],
+        header_namespace = "nvfuser_resources",
+    )
+
+    _libtorch_cuda_sources = list(libtorch_cuda_sources)
+    cpp_library(
+        name = "libtorch_cuda",
+        srcs = _libtorch_cuda_sources,
+        link_whole = True,
+        include_directories = include_directories,
+        # TODO: putting USE_CUDA in propagated_pp_flags is error-prone
+        propagated_pp_flags = propagated_pp_flags_cuda,
+        exported_deps = [
+            ":ATen",
+            ":generated-aten-headers-cuda",
+            ":generated-autograd-headers",
+            ":generated-nvfuser-headers",
+            ":libtorch",
+            "//caffe2/caffe2:caffe2_cpu",
+            "//caffe2/caffe2:caffe2_gpu",
+            "//caffe2/torch/lib/libshm:libshm",
+            "//gloo:gloo_gpu_cuda",
+            "//tensorpipe:tensorpipe_cuda",
+        ],
+        exported_external_deps = [
+            ("cudnn", None, "cudnn-lazy"),
+            ("cuda", None, "nvToolsExt-lazy"),
+            ("cuda", None, "nvrtc-lazy"),
+            ("cuda", None, "nvrtc-builtins-lazy"),
+        ] + get_nccl_dependency(),
+        compiler_flags = compiler_flags_cpu + compiler_flags_cuda,
+        **common_flags
+    )
+
+    # (original_paths, hipified_paths)
+    libtorch_hip_headers_filter = torch_cpp_headers + [h for h in common_headers if any([h.startswith(d) for d in [
+        # headers in the following directories are added to libtorch_hip_headers_filter
+        # so that they are not hipified.
+        "torch/csrc/deploy/",
+        "torch/csrc/distributed/rpc/metrics/",
+        "torch/csrc/jit/serialization/",
+        "torch/cpp/jit/",
+        "torch/cpp/tensorexpr/",
+    ]])]
+    libtorch_hip_sources = (libtorch_cuda_sources, [f.replace(".cu", ".hip") for f in libtorch_cuda_sources])
+    libtorch_hip_headers = ([f for f in common_headers if f not in libtorch_hip_headers_filter],) * 2
+
+    custom_rule(
+        name = "fb_libtorch_hipify_gen",
+        srcs = libtorch_hip_sources[0] + libtorch_hip_headers[0],
+        build_args = "--source-dir= --hipify-dir= --copy-dir= --rewrite-cu-ext",
+        build_script_dep = "//caffe2:fb_caffe2_hipify",
+        output_gen_files = libtorch_hip_sources[1] + libtorch_hip_headers[1],
+    )
+
+    cpp_library(
+        name = "libtorch_hip_headers",
+        headers = [":fb_libtorch_hipify_gen={}".format(f) for f in libtorch_hip_headers[1]],
+        header_namespace = "",
+    )
+
+    cpp_library(
+        name = "libtorch_hip",
+        srcs = [":fb_libtorch_hipify_gen={}".format(f) for f in libtorch_hip_sources[1]],
+        headers = [f for f in common_headers if f in libtorch_hip_headers_filter],
+        link_whole = True,
+        propagated_pp_flags = hip_pp_flags,
+        exported_deps = [
+            ":generated-aten-headers-hip",
+            ":generated-autograd-headers",
+            ":generated-nvfuser-headers",
+            ":libtorch",
+            ":libtorch_hip_headers",
+            "//caffe2:ATen-hip",
+            "//caffe2/caffe2:caffe2_cpu",
+            "//caffe2/caffe2:caffe2_gpu_hip",
+            "//caffe2/torch/lib/libshm:libshm",
+            "//gloo:gloo_gpu_hip",
+            "//tensorpipe:tensorpipe_cpu",  # TODO: include a HIP version once it's developed
+        ],
+        exported_external_deps = hip_external_deps,
+        compiler_flags = compiler_flags_cpu + compiler_flags_hip + [
+            "-Wno-unused-result",
+        ],
+        hip_flags = ["-Wno-unused-result"] + get_hip_flags(),
+        compiler_specific_flags = common_flags["compiler_specific_flags"],
+    )
+
+    gpu_library_targets(
+        name = "libtorch_gpu",
+        deps_cpu = [
+            ":libtorch",
+        ],
+        deps_cuda = [
+            ":libtorch_cuda",
+        ],
+        deps_hip = [
+            ":libtorch_hip",
+        ],
+        exclude_hip_target = False,
+        extra_external_deps = [],
+    )
+
+    # torch-cpp is still conditionally compiled based on USE_CUDA. Ideally we'd
+    # separate it out as an additive library instead.
+    gpu_library_selector(
+        name = "torch-cpp",
+        deps_cpu = [":torch-cpp-cpu"],
+        deps_cuda = [":torch-cpp-cuda"],
+        deps_hip = [":torch-cpp-hip"],
+        merge_cpu_deps = False,
+        exclude_hip_target = False,
+    )
+
+    # USE_CUDA flag is propagated through propagated_pp_flags on libtorch
+    cpp_library(
+        name = "torch-cpp-cuda",
+        srcs = torch_cpp_srcs,
+        headers = torch_cpp_headers,
+        include_directories = [
+            ".",
+            "torch/csrc/api/include/",
+        ],
+        exported_deps = [
+            ":libtorch_cuda",
+            "//caffe2/torch/fb/init:init",
+        ],
+        exported_external_deps = [
+            ("cuda", None, "cuda-lazy"),
+            ("cudnn", None, "cudnn-lazy"),
+        ],
+    )
+
+    cpp_library(
+        name = "torch-cpp-hip",
+        srcs = torch_cpp_srcs,
+        headers = torch_cpp_headers,
+        include_directories = [
+            ".",
+            "torch/csrc/api/include/",
+        ],
+        exported_deps = [
+            ":libtorch_hip",
+            "//caffe2/torch/fb/init:init",
+        ],
+        exported_external_deps = hip_external_deps,
+    )
+
+    cpp_library(
+        name = "torch-cpp-cpu",
+        srcs = torch_cpp_srcs,
+        headers = torch_cpp_headers,
+        include_directories = [
+            ".",
+            "torch/csrc/api/include/",
+        ],
+        exported_deps = [
+            ":libtorch",
+            "//caffe2/torch/fb/init:init",
+        ],
+    )
+
+    # _C_impl is still conditionally compiled based on USE_CUDA. Ideally we'd
+    # separate it out as an additive library instead.
+    # TODO: split it into cpp and cuda parts similarly to libtorch
+    gpu_library_selector(
+        name = "_C_impl",
+        deps_cpu = [":_C_impl_cpu"],
+        deps_cuda = [":_C_impl_cuda"],
+        deps_hip = [":_C_impl_hip"],
+        merge_cpu_deps = False,
+        exclude_hip_target = False,
+    )
+
+    cpp_library(
+        name = "_C_impl_cpu",
+        srcs = libtorch_python_sources,
+        link_whole = True,
+        exported_deps = [
+            "fbsource//third-party/fmt:fmt",
+            ":torch-cpp-cpu",
+            "//caffe2/torch/fb/init:init",
+            "//caffe2/torch/lib/libshm:libshm",
+        ],
+        exported_external_deps = [
+            ("numpy", None, "cpp"),
+            ("pybind11", None),
+            ("python", None),
+        ],
+        compiler_flags = compiler_flags_cpu,
+        compiler_specific_flags = common_flags["compiler_specific_flags"],
+    )
+
+    # This target is used to help get headers for compile-time deps for torch::deploy
+    # libinterpreter.so build _without_ getting link-time deps, which are supplied
+    # separately by the application that dlopens libinterpreter.so.
+    #
+    # We make use of the buck auto-generated #headers flavor of a target to accomplish this.
+    #
+    # However, since #headers flavor of target with srcs can't be used in all build modes, we
+    # work around this limitation by using this 'pass-through' target, which has a usable
+    # #headers flavor in all build modes.
+    cpp_library(
+        name = "headers_for_torch_python_deps",
+        exported_deps = [
+            ":_C_impl_cpu",
+        ],
+    )
+    cpp_library(
+        name = "headers_for_torch_python_cuda_deps",
+        exported_deps = [
+            ":_C_impl_cuda",
+        ],
+    )
+
+    # This target compiles torch_python bindings, but skips the deps on actual
+    # torch and python since those will be integrated specially in the wrapper for
+    # libinterpreter.so used in torch::deploy
+    cpp_library(
+        name = "torch_python_without_torch",
+        srcs = libtorch_python_sources + torch_cpp_srcs,
+        undefined_symbols = True,
+        preferred_linkage = "static",
+        exported_deps = [
+            ":headers_for_torch_python_deps#headers",
+        ],
+        exported_external_deps = [
+            ("pybind11", None),
+            ("frozenpython", None, "python-headers"),
+        ],
+        compiler_flags = compiler_flags_cpu + [
+            # some code in the Python bindings compiles differently
+            # when you are deploy
+            "-DUSE_DEPLOY",
+        ],
+        compiler_specific_flags = common_flags["compiler_specific_flags"],
+    )
+
+    cpp_library(
+        name = "torch_python_cuda_without_torch",
+        srcs = libtorch_python_sources + torch_cpp_srcs + libtorch_python_cuda_sources,
+        undefined_symbols = True,
+        preferred_linkage = "static",
+        exported_deps = [
+            ":headers_for_torch_python_cuda_deps#headers",
+        ],
+        exported_external_deps = [
+            ("pybind11", None),
+            ("frozenpython", None, "python-headers"),
+        ],
+        compiler_flags = compiler_flags_cpu + [
+            "-DUSE_CUDA",
+            # some code in the Python bindings compiles differently
+            # when you are deploy
+            "-DUSE_DEPLOY",
+        ],
+        compiler_specific_flags = common_flags["compiler_specific_flags"],
+    )
+
+    cpp_library(
+        name = "_C_impl_cuda",
+        srcs = libtorch_python_sources + libtorch_python_cuda_sources,
+        link_whole = True,
+        exported_deps = [
+            "fbsource//third-party/fmt:fmt",
+            ":torch-cpp-cuda",
+            "//caffe2/torch/fb/init:init",
+            "//caffe2/torch/lib/libshm:libshm",
+        ],
+        exported_external_deps = [
+            ("numpy", None, "cpp"),
+            ("pybind11", None),
+            ("python", None),
+        ],
+        compiler_flags = compiler_flags_cpu + compiler_flags_cuda,
+        compiler_specific_flags = common_flags["compiler_specific_flags"],
+    )
+
+    # Autogenerated files whose rules contain ":" are not hipified.
+    libtorch_python_hip_sources = [f for f in (libtorch_python_sources + libtorch_python_cuda_sources) if ":" in f]
+    libtorch_python_hip_sources_hipified = [f for f in (libtorch_python_sources + libtorch_python_cuda_sources) if not ":" in f]
+
+    custom_rule(
+        name = "fb_C_impl_hipify_gen",
+        srcs = libtorch_python_hip_sources_hipified,
+        build_args = "--source-dir= --hipify-dir= --copy-dir=",
+        build_script_dep = "//caffe2:fb_caffe2_hipify",
+        output_gen_files = libtorch_python_hip_sources_hipified,
+    )
+
+    cpp_library(
+        name = "_C_impl_hip",
+        srcs = [":fb_C_impl_hipify_gen={}".format(f) for f in (libtorch_python_hip_sources_hipified)] + libtorch_python_hip_sources,
+        link_whole = True,
+        exported_deps = [
+            "fbsource//third-party/fmt:fmt",
+            ":torch-cpp-hip",
+            "//caffe2/torch/fb/init:init",
+            "//caffe2/torch/lib/libshm:libshm",
+        ],
+        exported_external_deps = [
+            ("numpy", None, "cpp"),
+            ("pybind11", None),
+            ("python", None),
+        ],
+        compiler_flags = compiler_flags_cpu + compiler_flags_hip + ["-Wno-unused-result"],
+        compiler_specific_flags = common_flags["compiler_specific_flags"],
+    )
+
+    cpp_python_extension(
+        name = "_C",
+        srcs = [
+            "torch/csrc/stub.c",
+        ],
+        base_module = "torch",
+        deps = [
+            ":_C_impl",
+            "//caffe2:flatbuffer_loader",
+        ],
+    )
+
+    cpp_python_extension(
+        name = "_C_flatbuffer",
+        srcs = [
+            "torch/csrc/stub_with_flatbuffer.c",
+            "torch/csrc/init_flatbuffer_module.cpp",
+        ],
+        base_module = "torch",
+        deps = [
+            ":_C_impl",
+            "//caffe2:flatbuffer_loader",
+            "//caffe2:flatbuffer_serializer",
+        ],
+    )
+
+    return r
diff --git a/torch/csrc/deploy/interpreter/defs.bzl b/torch/csrc/deploy/interpreter/defs.bzl
new file mode 100644
index 000000000000..719155cf7da0
--- /dev/null
+++ b/torch/csrc/deploy/interpreter/defs.bzl
@@ -0,0 +1,117 @@
+load("@fbcode_macros//build_defs:cpp_binary.bzl", "cpp_binary")
+load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
+load("@fbcode_macros//build_defs:native_rules.bzl", "cxx_genrule")
+
+# @lint-ignore-every BUCKLINT
+load("@fbsource//tools/build_defs:fb_native_wrapper.bzl", "fb_native")
+
+def embedded_interpreter(name, suffix, legacy = False, exported_deps = [], exported_external_deps = []):
+    final_name = name
+    is_all = suffix == "all"
+    is_cuda = suffix == "cuda" or is_all
+    platform_static_lib = []
+    for platform in ["platform009", "platform010"]:
+        name = platform + "_" + final_name
+        so_name = name + ".so"
+        cpp_binary(
+            name = so_name,
+            srcs = [
+                "interpreter_impl.cpp",
+            ] + (["import_find_sharedfuncptr.cpp"] if is_all else []),
+            headers = [
+                "Optional.hpp",
+                "interpreter_impl.h",
+            ],
+            header_namespace = "torch/csrc/deploy",
+            dlopen_enabled = True,
+            linker_flags = ([
+                # This ensures only the intended interface symbols are public/global
+                # the rest are hidden, regardless of how they were compiled
+                # (e.g. fvisibility=hidden is NOT important for the component
+                # objs in this library, since we override here.)
+                "--version-script=$(location :hide_symbols.script)",
+            ] if not is_all else []),
+            deps = [
+                "fbsource//third-party/fmt:fmt",
+            ] + ([
+                ":builtin_registry_cuda",
+                "//caffe2:torch_python_cuda_without_torch",
+                "//deeplearning/trt/python:frozen_tensorrt",
+            ] if is_cuda else [
+                ":builtin_registry",
+                "//caffe2:torch_python_without_torch",
+            ]),
+            external_deps =
+                [
+                    # needed for interpreter.cpp itself, it uses pybind currently
+                    ("frozenpython", None, "python-frozen"),
+                    ("frozenpython", None, "python"),
+                ],
+            fbcode_platform = platform,
+        )
+
+        # We build torch::deploy with two embedded binaries- one with only cpu py bindings,
+        # the other with cpu+cuda py bindings.  This unfortunately wastes some binary size,
+        # but at least at runtime only one of them is loaded.
+        #
+        # This is becuase of two reasons
+        # (1) that applications such as predictor want to depend on torch::deploy in a
+        # cuda-agnostic way, e.g. they don't choose yet, and a binary/app that depends
+        # on predictor either chooses to include or not include a dep on cuda.
+        #
+        # (2) the way the embedded binary is created and loaded, it only exposes a small
+        # set of interface symbols globally, for creating a new interpreter, and hides its
+        # other symbols (esp. python ones) so they don't conflict with other interpreters.
+        # This prevents dividing the cpu and cuda portions of bindings into _separate_ libs
+        # and loading the cuda part additively.  Hence to achieve requirement (1) we bundle
+        # two complete interpreter libs, one with and one without cuda.
+
+        cp_cmd = "$(location //caffe2/torch/csrc/deploy:remove_dt_needed)" if suffix == "all" else "cp"
+
+        build_name = "build_" + name
+        if not legacy:
+            cxx_genrule(
+                name = build_name,
+                out = "embedded_interpreter_" + suffix + ".a",
+                cmd = """\
+                """ + cp_cmd + """ $(location :""" + so_name + """) libtorch_deployinterpreter_internal_""" + suffix + """.so
+                ld -r -b binary -o ${TMP}/embedded_interpreter_""" + suffix + """.o libtorch_deployinterpreter_internal_""" + suffix + """.so
+                objcopy --rename-section .data=.torch_deploy_payload.interpreter_""" + suffix + """,readonly,contents -N _binary_libtorch_deployinterpreter_""" + suffix + """_so_start -N _binary_libtorch_deployinterpreter_""" + suffix + """_so_end ${TMP}/embedded_interpreter_""" + suffix + """.o
+                ar rcs ${OUT} ${TMP}/embedded_interpreter_""" + suffix + """.o
+                """,
+            )
+        else:
+            cxx_genrule(
+                name = build_name,
+                out = "embedded_interpreter_cuda_legacy.a",
+                cmd = """\
+                cp $(location :""" + so_name + """) libtorch_deployinterpreter_cuda.so
+                ld -r -b binary -o ${TMP}/embedded_interpreter_cuda.o libtorch_deployinterpreter_cuda.so
+                ar rcs ${OUT} ${TMP}/embedded_interpreter_cuda.o
+                """,
+            )
+        platform_static_lib.append(["^" + platform, ":" + build_name])
+
+    internal_name = final_name + "_internal"
+    fb_native.prebuilt_cxx_library(
+        preferred_linkage = "static",
+        name = internal_name,
+        visibility = ["PUBLIC"],
+        link_whole = True,
+        platform_static_lib = platform_static_lib,
+    )
+
+    # a thin wrapper around :embedded_interpreter_internal to add --export-dynamic
+    # linker flags. The flag will be propagated to cpp_binary. We don't require
+    # cpp_binary to explicitly enable --export-dynamic any more. New usecases usually
+    # forgot to do so and caused interpreter not found crash.
+    cpp_library(
+        name = final_name,
+        linker_flags = [
+            "--export-dynamic",
+        ],
+        exported_deps = [
+            ":" + internal_name,
+        ] + exported_deps,
+        exported_external_deps = exported_external_deps,
+    )
diff --git a/torch/csrc/deploy/unity/unity.bzl b/torch/csrc/deploy/unity/unity.bzl
new file mode 100644
index 000000000000..8431356a4df9
--- /dev/null
+++ b/torch/csrc/deploy/unity/unity.bzl
@@ -0,0 +1,46 @@
+load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
+load("@fbcode_macros//build_defs:native_rules.bzl", "cxx_genrule")
+load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary")
+
+# @lint-ignore-every BUCKLINT
+load("@fbsource//tools/build_defs:fb_native_wrapper.bzl", "fb_native")
+
+def build_unity(name, **kwargs):
+    python_binary(name = name, **kwargs)
+
+    cxx_genrule(
+        name = "{}_build_python_app_lib".format(name),
+        out = "python_app.a",
+        cmd = """\
+        cp $(location :""" + name + """) python_app
+        ld -r -b binary -o ${TMP}/python_app.o python_app
+        # rename the .data section to .torch_deploy_payload.unity.
+        # don't set the alloc/load flags for the section so it will not join
+        # the party of relocation.
+        # Also strip the _binary_python_app_start/end/size symbols to avoid
+        # confusion.
+        objcopy --rename-section .data=.torch_deploy_payload.unity,readonly,contents -N  _binary_python_app_start -N  _binary_python_app_end -N  _binary_python_app_size ${TMP}/python_app.o
+        ar rcs ${OUT} ${TMP}/python_app.o
+        """,
+    )
+
+    fb_native.prebuilt_cxx_library(
+        name = "{}_python_app_lib".format(name),
+        visibility = ["PUBLIC"],
+        link_whole = True,
+        preferred_linkage = "static",
+        static_lib = ":{}_build_python_app_lib".format(name),
+    )
+
+    cpp_library(
+        name = "{}_unity_lib".format(name),
+        srcs = [
+        ],
+        linker_flags = [
+            "--export-dynamic",
+        ],
+        exported_deps = [
+            "//caffe2/torch/csrc/deploy/unity:unity_core",
+            ":{}_python_app_lib".format(name),
+        ],
+    )