Bazel build of pytorch with gating CI (#36011)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/36011 Differential Revision: D20873430 Pulled By: malfet fbshipit-source-id: 8ffffd10ca0ff8bdab578a70a9b2b777aed985d0
2025-10-20 21:14:14 +08:00 · 2020-04-06 22:48:33 -07:00
parent 64594d8333
commit 447bcd341d
37 changed files with 4519 additions and 56 deletions
--- a/.bazelrc
+++ b/.bazelrc
@ -0,0 +1,3 @@
+build --copt=--std=c++14
+build --copt=-I.
+build --copt=-isystem --copt bazel-out/k8-fastbuild/bin
--- a/.bazelversion
+++ b/.bazelversion
@ -0,0 +1 @@
+2.2.0
--- a/.circleci/cimodel/data/pytorch_build_definitions.py
+++ b/.circleci/cimodel/data/pytorch_build_definitions.py
@ -187,9 +187,9 @@ def instantiate_configs():

    root = get_root()
    found_configs = conf_tree.dfs(root)
-    restrict_phases = None
    for fc in found_configs:

+        restrict_phases = None
        distro_name = fc.find_prop("distro_name")
        compiler_name = fc.find_prop("compiler_name")
        compiler_version = fc.find_prop("compiler_version")
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -1557,6 +1557,77 @@ jobs:
            cd ${PROJ_ROOT}/ios/TestApp
            instruments -s -devices
            fastlane scan
+  pytorch_linux_bazel_build:
+    <<: *pytorch_params
+    machine:
+      image: ubuntu-1604:201903-01
+    steps:
+    # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
+    - attach_scripts
+    - setup_linux_system_environment
+    - checkout
+    - setup_ci_environment
+    - run:
+        name: Bazel Build
+        no_output_timeout: "1h"
+        command: |
+          set -e
+          # Pull Docker image and run build
+          echo "DOCKER_IMAGE: "${DOCKER_IMAGE}
+          time docker pull ${DOCKER_IMAGE} >/dev/null
+          export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
+
+          echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
+
+          git submodule sync && git submodule update -q --init --recursive
+
+          docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
+
+          export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/build.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+
+          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+          # Push intermediate Docker image for next phase to use
+          if [ -z "${BUILD_ONLY}" ]; then
+            # Augment our output image name with bazel to avoid collisions
+            output_image=${DOCKER_IMAGE}-bazel-${CIRCLE_SHA1}
+            export COMMIT_DOCKER_IMAGE=$output_image
+            docker commit "$id" ${COMMIT_DOCKER_IMAGE}
+            time docker push ${COMMIT_DOCKER_IMAGE}
+          fi
+
+  pytorch_linux_bazel_test:
+    <<: *pytorch_params
+    machine:
+      image: ubuntu-1604:201903-01
+    steps:
+    # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
+    - attach_scripts
+    - setup_linux_system_environment
+    - setup_ci_environment
+    - run:
+        name: Test
+        no_output_timeout: "90m"
+        command: |
+          set -e
+          output_image=${DOCKER_IMAGE}-bazel-${CIRCLE_SHA1}
+          export COMMIT_DOCKER_IMAGE=$output_image
+          echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
+
+          time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
+
+          if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then
+            export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --runtime=nvidia -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
+          else
+            export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
+          fi
+
+          if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
+            export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+          else
+            export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "export CIRCLE_PULL_REQUEST=${CIRCLE_PULL_REQUEST}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+          fi
+          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts

  promote_s3:
    <<: *promote_common
@ -2438,6 +2509,20 @@ workflows:
          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7:e43973a9-9d5a-4138-9181-a08a0fc55e2f"
          use_cuda_docker_runtime: "1"
          resource_class: gpu.medium
+      - pytorch_linux_bazel_build:
+          name: pytorch_bazel_build
+          requires:
+            - setup
+          build_environment: "pytorch-linux-xenial-py3.6-gcc7-bazel-build"
+          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc7:f990c76a-a798-42bb-852f-5be5006f8026"
+          resource_class: large
+      - pytorch_linux_bazel_test:
+          name: pytorch_bazel_test
+          requires:
+            - setup
+            - pytorch_bazel_build
+          build_environment: "pytorch-linux-xenial-py3.6-gcc7-bazel-test"
+          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc7:f990c76a-a798-42bb-852f-5be5006f8026"
      - caffe2_linux_build:
          name: caffe2_onnx_main_py3_6_clang7_ubuntu16_04_build
          requires:
--- a/.circleci/generate_config_yml.py
+++ b/.circleci/generate_config_yml.py
@ -101,6 +101,7 @@ YAML_SOURCES = [
    File("workflows-pytorch-ios-builds.yml"),
    File("workflows-pytorch-mobile-builds.yml"),
    File("workflows-pytorch-ge-config-tests.yml"),
+    File("workflows-pytorch-bazel-builds.yml"),
    Listgen(caffe2_build_definitions.get_workflow_jobs, 3),
    File("workflows-binary-builds-smoke-subset.yml"),
    Listgen(binary_build_definitions.get_binary_smoke_test_jobs, 3),
--- a/.circleci/verbatim-sources/job-specs-custom.yml
+++ b/.circleci/verbatim-sources/job-specs-custom.yml
@ -440,3 +440,74 @@
            cd ${PROJ_ROOT}/ios/TestApp
            instruments -s -devices
            fastlane scan
+  pytorch_linux_bazel_build:
+    <<: *pytorch_params
+    machine:
+      image: ubuntu-1604:201903-01
+    steps:
+    # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
+    - attach_scripts
+    - setup_linux_system_environment
+    - checkout
+    - setup_ci_environment
+    - run:
+        name: Bazel Build
+        no_output_timeout: "1h"
+        command: |
+          set -e
+          # Pull Docker image and run build
+          echo "DOCKER_IMAGE: "${DOCKER_IMAGE}
+          time docker pull ${DOCKER_IMAGE} >/dev/null
+          export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
+
+          echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
+
+          git submodule sync && git submodule update -q --init --recursive
+
+          docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
+
+          export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/build.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+
+          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+          # Push intermediate Docker image for next phase to use
+          if [ -z "${BUILD_ONLY}" ]; then
+            # Augment our output image name with bazel to avoid collisions
+            output_image=${DOCKER_IMAGE}-bazel-${CIRCLE_SHA1}
+            export COMMIT_DOCKER_IMAGE=$output_image
+            docker commit "$id" ${COMMIT_DOCKER_IMAGE}
+            time docker push ${COMMIT_DOCKER_IMAGE}
+          fi
+
+  pytorch_linux_bazel_test:
+    <<: *pytorch_params
+    machine:
+      image: ubuntu-1604:201903-01
+    steps:
+    # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
+    - attach_scripts
+    - setup_linux_system_environment
+    - setup_ci_environment
+    - run:
+        name: Test
+        no_output_timeout: "90m"
+        command: |
+          set -e
+          output_image=${DOCKER_IMAGE}-bazel-${CIRCLE_SHA1}
+          export COMMIT_DOCKER_IMAGE=$output_image
+          echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
+
+          time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
+
+          if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then
+            export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --runtime=nvidia -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
+          else
+            export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
+          fi
+
+          if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
+            export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+          else
+            export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "export CIRCLE_PULL_REQUEST=${CIRCLE_PULL_REQUEST}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+          fi
+          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
--- a/.circleci/verbatim-sources/workflows-pytorch-bazel-builds.yml
+++ b/.circleci/verbatim-sources/workflows-pytorch-bazel-builds.yml
@ -0,0 +1,14 @@
+      - pytorch_linux_bazel_build:
+          name: pytorch_bazel_build
+          requires:
+            - setup
+          build_environment: "pytorch-linux-xenial-py3.6-gcc7-bazel-build"
+          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc7:f990c76a-a798-42bb-852f-5be5006f8026"
+          resource_class: large
+      - pytorch_linux_bazel_test:
+          name: pytorch_bazel_test
+          requires:
+            - setup
+            - pytorch_bazel_build
+          build_environment: "pytorch-linux-xenial-py3.6-gcc7-bazel-test"
+          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc7:f990c76a-a798-42bb-852f-5be5006f8026"
--- a/.gitignore
+++ b/.gitignore
@ -255,3 +255,6 @@ TAGS

 # clangd background index
 .clangd/
+
+# bazel symlinks
+bazel-*
--- a/.jenkins/pytorch/build.sh
+++ b/.jenkins/pytorch/build.sh
@ -180,14 +180,20 @@ if [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then
  export CXX=clang++
 fi

+if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then
+  set -e

-# check that setup.py would fail with bad arguments
-echo "The next three invocations are expected to fail with invalid command error messages."
-( ! get_exit_code python setup.py bad_argument )
-( ! get_exit_code python setup.py clean] )
-( ! get_exit_code python setup.py clean bad_argument )
+  get_bazel

-if [[ "$BUILD_ENVIRONMENT" != *libtorch* ]]; then
+  tools/bazel build :torch
+else
+  # check that setup.py would fail with bad arguments
+  echo "The next three invocations are expected to fail with invalid command error messages."
+  ( ! get_exit_code python setup.py bad_argument )
+  ( ! get_exit_code python setup.py clean] )
+  ( ! get_exit_code python setup.py clean bad_argument )
+
+  if [[ "$BUILD_ENVIRONMENT" != *libtorch* ]]; then

    # ppc64le build fails when WERROR=1
    # set only when building other architectures
@ -219,7 +225,7 @@ if [[ "$BUILD_ENVIRONMENT" != *libtorch* ]]; then
    make VERBOSE=1
    popd
    assert_git_not_dirty
-else
+  else
    # Test standalone c10 build
    if [[ "$BUILD_ENVIRONMENT" == *xenial-cuda10.1-cudnn7-py3* ]]; then
      mkdir -p c10/build
@ -239,6 +245,7 @@ else
    pushd ../cpp-build/caffe2
    WERROR=1 VERBOSE=1 DEBUG=1 python $BUILD_LIBTORCH_PY
    popd
+  fi
 fi

 # Test XLA build
--- a/.jenkins/pytorch/common.sh
+++ b/.jenkins/pytorch/common.sh
@ -187,3 +187,12 @@ function file_diff_from_base() {
  set -e
  git diff --name-only "$(git merge-base origin master HEAD)" > "$1"
 }
+
+function get_bazel() {
+  # download bazel version
+  wget https://github.com/bazelbuild/bazel/releases/download/2.2.0/bazel-2.2.0-linux-x86_64 -O tools/bazel
+  # verify content
+  echo 'b2f002ea0e6194a181af6ac84cd94bd8dc797722eb2354690bebac92dda233ff tools/bazel' | sha256sum --quiet -c
+
+  chmod +x tools/bazel
+}
--- a/.jenkins/pytorch/test.sh
+++ b/.jenkins/pytorch/test.sh
@ -42,7 +42,7 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
 fi

 # --user breaks ppc64le builds and these packages are already in ppc64le docker
-if [[ "$BUILD_ENVIRONMENT" != *ppc64le* ]]; then
+if [[ "$BUILD_ENVIRONMENT" != *ppc64le* ]] && [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]] ; then
  # JIT C++ extensions require ninja.
  pip_install --user ninja
  # ninja is installed in /var/lib/jenkins/.local/bin
@ -252,7 +252,15 @@ test_backward_compatibility() {
  assert_git_not_dirty
 }

-if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* ]]; then
+test_bazel() {
+  set -e
+
+  get_bazel
+
+  tools/bazel test --test_tag_filters=-gpu-required --test_filter=-*_CUDA :all_tests
+}
+
+if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
  (cd test && python -c "import torch; print(torch.__config__.show())")
  (cd test && python -c "import torch; print(torch.__config__.parallel_info())")
 fi
@ -278,6 +286,8 @@ elif [[ "${BUILD_ENVIRONMENT}" == *-test2 || "${JOB_BASE_NAME}" == *-test2 ]]; t
  test_aten
  test_libtorch
  test_custom_script_ops
+elif [[ "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
+  test_bazel
 else
  test_torchvision
  test_python_nn
--- a/BUILD.bazel
+++ b/BUILD.bazel
--- a/160
+++ b/160
@ -0,0 +1,160 @@
+workspace(name = "pytorch")
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+load("//tools/rules:workspace.bzl", "new_patched_local_repository")
+
+http_archive(
+    name = "bazel_skylib",
+    urls = [
+        "https://github.com/bazelbuild/bazel-skylib/releases/download/1.0.2/bazel-skylib-1.0.2.tar.gz",
+    ],
+)
+
+http_archive(
+    name = "com_google_googletest",
+    strip_prefix = "googletest-cd6b9ae3243985d4dc725abd513a874ab4161f3e",
+    urls = [
+        "https://github.com/google/googletest/archive/cd6b9ae3243985d4dc725abd513a874ab4161f3e.tar.gz",
+    ],
+)
+
+http_archive(
+  name = "pybind11_bazel",
+  strip_prefix = "pybind11_bazel-7f397b5d2cc2434bbd651e096548f7b40c128044",
+  urls = ["https://github.com/pybind/pybind11_bazel/archive/7f397b5d2cc2434bbd651e096548f7b40c128044.zip"],
+  sha256 = "e4a9536f49d4a88e3c5a09954de49c4a18d6b1632c457a62d6ec4878c27f1b5b",
+)
+
+new_local_repository(
+    name = "pybind11",
+    build_file = "@pybind11_bazel//:pybind11.BUILD",
+    path = "third_party/pybind11",
+)
+
+http_archive(
+    name = "com_github_glog",
+    strip_prefix = "glog-0.4.0",
+    urls = [
+        "https://github.com/google/glog/archive/v0.4.0.tar.gz",
+    ],
+)
+
+http_archive(
+    name = "com_github_gflags_gflags",
+    strip_prefix = "gflags-2.2.2",
+    urls = [
+        "https://github.com/gflags/gflags/archive/v2.2.2.tar.gz",
+    ],
+    sha256 = "34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf",
+)
+
+new_local_repository(
+    name = "gloo",
+    build_file = "//third_party:gloo.BUILD",
+    path = "third_party/gloo",
+)
+
+new_local_repository(
+    name = "onnx",
+    build_file = "//third_party:onnx.BUILD",
+    path = "third_party/onnx",
+)
+
+new_local_repository(
+    name = "foxi",
+    build_file = "//third_party:foxi.BUILD",
+    path = "third_party/foxi",
+)
+
+local_repository(
+    name = "com_google_protobuf",
+    path = "third_party/protobuf",
+)
+
+new_local_repository(
+    name = "eigen",
+    build_file = "//third_party:eigen.BUILD",
+    path = "third_party/eigen",
+)
+
+new_local_repository(
+    name = "fbgemm",
+    build_file = "//third_party:fbgemm.BUILD",
+    path = "third_party/fbgemm",
+)
+
+new_local_repository(
+    name = "ideep",
+    build_file = "//third_party:ideep.BUILD",
+    path = "third_party/ideep",
+)
+
+new_local_repository(
+    name = "mkl_dnn",
+    build_file = "//third_party:mkl-dnn.BUILD",
+    path = "third_party/ideep/mkl-dnn",
+)
+
+new_local_repository(
+    name = "cpuinfo",
+    build_file = "//third_party:cpuinfo.BUILD",
+    path = "third_party/cpuinfo",
+)
+
+new_local_repository(
+    name = "asmjit",
+    build_file = "//third_party:asmjit.BUILD",
+    path = "third_party/fbgemm/third_party/asmjit",
+)
+
+new_local_repository(
+    name = "sleef",
+    build_file = "//third_party:sleef.BUILD",
+    path = "third_party/sleef",
+)
+
+new_patched_local_repository(
+    name = "tbb",
+    patches = [
+        "@//third_party:tbb.patch",
+    ],
+    patch_strip = 1,
+    build_file = "//third_party:tbb.BUILD",
+    path = "third_party/tbb",
+)
+
+http_archive(
+    name = "mkl",
+    build_file = "//third_party:mkl.BUILD",
+    strip_prefix = "lib",
+    sha256 = "59154b30dd74561e90d547f9a3af26c75b6f4546210888f09c9d4db8f4bf9d4c",
+    urls = [
+        "https://anaconda.org/anaconda/mkl/2020.0/download/linux-64/mkl-2020.0-166.tar.bz2",
+    ],
+)
+
+http_archive(
+    name = "mkl_headers",
+    build_file = "//third_party:mkl_headers.BUILD",
+    sha256 = "2af3494a4bebe5ddccfdc43bacc80fcd78d14c1954b81d2c8e3d73b55527af90",
+    urls = [
+        "https://anaconda.org/anaconda/mkl-include/2020.0/download/linux-64/mkl-include-2020.0-166.tar.bz2",
+    ],
+)
+
+http_archive(
+    name = "rules_python",
+    url = "https://github.com/bazelbuild/rules_python/releases/download/0.0.1/rules_python-0.0.1.tar.gz",
+    sha256 = "aa96a691d3a8177f3215b14b0edc9641787abaaa30363a080165d06ab65e1161",
+)
+
+load("@pybind11_bazel//:python_configure.bzl", "python_configure")
+python_configure(name = "local_config_python")
+
+load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps")
+
+protobuf_deps()
+
+load("@rules_python//python:repositories.bzl", "py_repositories")
+
+py_repositories()
--- a/aten.bzl
+++ b/aten.bzl
@ -0,0 +1,37 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+CPU_CAPABILITY_NAMES = ["DEFAULT", "AVX", "AVX2"]
+PREFIX = "aten/src/ATen/native/"
+
+def intern_build_aten_ops(copts, deps):
+    for cpu_capability in CPU_CAPABILITY_NAMES:
+        srcs = []
+        for impl in native.glob(
+            [
+                PREFIX + "cpu/*.cpp",
+                PREFIX + "quantized/cpu/kernels/*.cpp",
+            ]):
+            name = impl.replace(PREFIX, "")
+            out = PREFIX + name + "." + cpu_capability + ".cpp"
+            native.genrule(
+                name = name + "_" + cpu_capability + "_cp",
+                srcs = [impl],
+                outs = [out],
+                cmd = "cp $< $@",
+            )
+            srcs.append(out)
+
+        cc_library(
+            name = "ATen_CPU_" + cpu_capability,
+            srcs = srcs,
+            copts = copts + [
+                "-DCPU_CAPABILITY=" + cpu_capability,
+                "-DCPU_CAPABILITY_" + cpu_capability,
+            ],
+            deps = deps,
+        )
+    cc_library(
+        name = "ATen_CPU",
+        srcs = ["ATen_CPU_" + cpu_capability for cpu_capability in CPU_CAPABILITY_NAMES],
+        linkstatic = 1,
+    )
--- a/third_party/BUILD
+++ b/third_party/BUILD
--- a/third_party/asmjit.BUILD
+++ b/third_party/asmjit.BUILD
@ -0,0 +1,28 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+cc_library(
+    name = "asmjit",
+    srcs = glob([
+        "src/asmjit/core/*.cpp",
+        "src/asmjit/x86/*.cpp",
+    ]),
+    hdrs = glob([
+        "src/asmjit/x86/*.h",
+        "src/asmjit/core/*.h",
+        "src/asmjit/*.h",
+    ]),
+    copts = [
+        "-DASMJIT_STATIC",
+        "-fno-tree-vectorize",
+        "-std=c++17",
+        "-fmerge-all-constants",
+        "-std=gnu++11",
+        "-DTH_BLAS_MKL",
+    ],
+    includes = [
+        "asmjit/",
+        "src/",
+    ],
+    linkstatic = True,
+    visibility = ["//visibility:public"],
+)
--- a/third_party/cpuinfo.BUILD
+++ b/third_party/cpuinfo.BUILD
@ -0,0 +1,56 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+cc_library(
+    name = "clog",
+    srcs = [
+        "deps/clog/src/clog.c",
+    ],
+    hdrs = glob([
+        "deps/clog/include/*.h",
+    ]),
+    includes = [
+        "deps/clog/include/",
+    ],
+    linkstatic = True,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "cpuinfo",
+    srcs = glob(
+        [
+            "src/*.c",
+            "src/linux/*.c",
+            "src/x86/*.c",
+            "src/x86/cache/*.c",
+            "src/x86/linux/*.c",
+        ],
+        exclude = [
+            "src/x86/mockcpuid.c",
+            "src/linux/mockfile.c",
+        ],
+    ),
+    hdrs = glob([
+        "include/*.h",
+        "src/*.h",
+        "src/cpuinfo/*.h",
+        "src/include/*.h",
+        "src/x86/*.h",
+        "src/x86/linux/*.h",
+        "src/linux/*.h",
+    ]),
+    copts = [
+        "-DCPUINFO_LOG_LEVEL=2",
+        "-DTH_BLAS_MKL",
+        "-D_GNU_SOURCE=1",
+    ],
+    includes = [
+        "include",
+        "src",
+    ],
+    linkstatic = True,
+    visibility = ["//visibility:public"],
+    deps = [
+        ":clog",
+    ],
+)
--- a/third_party/eigen.BUILD
+++ b/third_party/eigen.BUILD
@ -0,0 +1,91 @@
+# This is BUILD file is derived from https://github.com/tensorflow/tensorflow/blob/master/third_party/eigen.BUILD
+
+# Description:
+#   Eigen is a C++ template library for linear algebra: vectors,
+#   matrices, and related algorithms.
+
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+licenses([
+    # Note: Eigen is an MPL2 library that includes GPL v3 and LGPL v2.1+ code.
+    #       We've taken special care to not reference any restricted code.
+    "reciprocal",  # MPL2
+    "notice",  # Portions BSD
+])
+
+exports_files(["COPYING.MPL2"])
+
+# License-restricted (i.e. not reciprocal or notice) files inside Eigen/...
+EIGEN_RESTRICTED_FILES = [
+    "Eigen/src/OrderingMethods/Amd.h",
+    "Eigen/src/SparseCholesky/**",
+]
+
+# Notable transitive dependencies of restricted files inside Eigen/...
+EIGEN_RESTRICTED_DEPS = [
+    "Eigen/Eigen",
+    "Eigen/IterativeLinearSolvers",
+    "Eigen/MetisSupport",
+    "Eigen/Sparse",
+    "Eigen/SparseCholesky",
+    "Eigen/SparseLU",
+]
+
+EIGEN_FILES = [
+    "Eigen/**",
+    "unsupported/Eigen/CXX11/**",
+    "unsupported/Eigen/FFT",
+    "unsupported/Eigen/KroneckerProduct",
+    "unsupported/Eigen/src/FFT/**",
+    "unsupported/Eigen/src/KroneckerProduct/**",
+    "unsupported/Eigen/MatrixFunctions",
+    "unsupported/Eigen/SpecialFunctions",
+    "unsupported/Eigen/Splines",
+    "unsupported/Eigen/src/MatrixFunctions/**",
+    "unsupported/Eigen/src/SpecialFunctions/**",
+    "unsupported/Eigen/src/Splines/**",
+    "unsupported/Eigen/NonLinearOptimization",
+    "unsupported/Eigen/NumericalDiff",
+    "unsupported/Eigen/src/**",
+    "unsupported/Eigen/Polynomials",
+]
+
+# List of files picked up by glob but actually part of another target.
+EIGEN_EXCLUDE_FILES = ["Eigen/src/Core/arch/AVX/PacketMathGoogleTest.cc"]
+
+# Disallowed eigen modules/files in rNA:
+# * Using the custom STL and memory support, it is not needed and should
+#   not be used with c++17.
+# * We will only support the EulerAnglesZYX provided by //atg/geometry so
+#   just don't allow people to access the unsupported eigen module.
+EIGEN_DISALLOW_FILES = [
+    "Eigen/StlSupport/*.h",
+    "unsupported/Eigen/EulerAngles",
+    "unsupported/Eigen/src/EulerAngles/**",
+]
+
+# Files known to be under MPL2 license.
+EIGEN_MPL2_HEADER_FILES = glob(
+    EIGEN_FILES,
+    exclude = EIGEN_EXCLUDE_FILES +
+              EIGEN_RESTRICTED_FILES +
+              EIGEN_DISALLOW_FILES +
+              EIGEN_RESTRICTED_DEPS + [
+        # Guarantees any file missed by excludes above will not compile.
+        "Eigen/src/Core/util/NonMPL2.h",
+        "Eigen/**/CMakeLists.txt",
+    ],
+)
+
+cc_library(
+    name = "eigen",
+    hdrs = EIGEN_MPL2_HEADER_FILES,
+    defines = [
+        # This define (mostly) guarantees we don't link any problematic
+        # code. We use it, but we do not rely on it, as evidenced above.
+        "EIGEN_MPL2_ONLY",
+        "EIGEN_MAX_ALIGN_BYTES=64",
+    ],
+    includes = ["."],
+    visibility = ["//visibility:public"],
+)
--- a/third_party/fbgemm.BUILD
+++ b/third_party/fbgemm.BUILD
@ -0,0 +1,221 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+cc_library(
+    name = "fbgemm_src_headers",
+    hdrs = [
+        "src/RefImplementations.h",
+    ],
+    include_prefix = "fbgemm",
+)
+
+cc_library(
+    name = "fbgemm_base",
+    srcs = [
+        "src/EmbeddingSpMDM.cc",
+        "src/EmbeddingSpMDMNBit.cc",
+        "src/ExecuteKernel.cc",
+        "src/ExecuteKernelU8S8.cc",
+        "src/Fbgemm.cc",
+        "src/FbgemmBfloat16Convert.cc",
+        "src/FbgemmConv.cc",
+        "src/FbgemmFP16.cc",
+        "src/FbgemmFloat16Convert.cc",
+        "src/FbgemmI64.cc",
+        "src/FbgemmI8Spmdm.cc",
+        "src/GenerateKernelU8S8S32ACC16.cc",
+        "src/GenerateKernelU8S8S32ACC16Avx512.cc",
+        "src/GenerateKernelU8S8S32ACC16Avx512VNNI.cc",
+        "src/GenerateKernelU8S8S32ACC32.cc",
+        "src/GenerateKernelU8S8S32ACC32Avx512.cc",
+        "src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc",
+        "src/GroupwiseConvAcc32Avx2.cc",
+        "src/PackAMatrix.cc",
+        "src/PackAWithIm2Col.cc",
+        "src/PackBMatrix.cc",
+        "src/PackMatrix.cc",
+        "src/PackAWithQuantRowOffset.cc",
+        "src/PackAWithRowOffset.cc",
+        "src/PackWeightMatrixForGConv.cc",
+        "src/PackWeightsForConv.cc",
+        "src/QuantUtils.cc",
+        "src/RefImplementations.cc",
+        "src/RowWiseSparseAdagradFused.cc",
+        "src/SparseAdagrad.cc",
+        "src/Utils.cc",
+        # Private headers
+        "src/CodeCache.h",
+        "src/CodeGenHelpers.h",
+        "src/ExecuteKernel.h",
+        "src/ExecuteKernelGeneric.h",
+        "src/ExecuteKernelU8S8.h",
+        "src/FbgemmFP16Common.h",
+        "src/GenerateKernel.h",
+        "src/GroupwiseConv.h",
+        "src/RefImplementations.h",
+        "src/TransposeUtils.h",
+    ],
+    hdrs = [
+        "include/fbgemm/FbgemmConvert.h",
+        "include/fbgemm/FbgemmI64.h",
+    ],
+    includes = [
+        ".",
+        "src",
+    ],
+    deps = [
+        ":fbgemm_avx2",
+        ":fbgemm_avx512",
+        ":fbgemm_headers",
+        ":fbgemm_src_headers",
+        "@asmjit",
+        "@cpuinfo",
+    ],
+    linkstatic = 1,
+)
+
+cc_library(
+    name = "fbgemm_avx2_circular",
+    srcs = [
+        "src/FbgemmFloat16ConvertAvx2.cc",
+    ],
+    copts = [
+        "-mavx2",
+        "-mf16c",
+    ],
+    deps = [
+        ":fbgemm_base",
+    ],
+    linkstatic = 1,
+)
+
+cc_library(
+    name = "fbgemm",
+    visibility = ["//visibility:public"],
+    deps = [
+        ":fbgemm_base",
+        ":fbgemm_avx2_circular",
+    ],
+    linkstatic = 1,
+)
+
+cc_library(
+    name = "fbgemm_avx2",
+    srcs = [
+        "src/EmbeddingSpMDMAvx2.cc",
+        "src/FbgemmBfloat16ConvertAvx2.cc",
+        # "src/FbgemmFloat16ConvertAvx2.cc",
+        "src/FbgemmI8Depthwise3DAvx2.cc",
+        "src/FbgemmI8Depthwise3x3Avx2.cc",
+        "src/FbgemmI8DepthwiseAvx2.cc",
+        "src/FbgemmI8DepthwisePerChannelQuantAvx2.cc",
+        "src/OptimizedKernelsAvx2.cc",
+        "src/PackDepthwiseConvMatrixAvx2.cc",
+        "src/QuantUtilsAvx2.cc",
+        "src/UtilsAvx2.cc",
+        # Inline Assembly sources
+        "src/FbgemmFP16UKernelsAvx2.cc",
+        # Private headers
+        "src/FbgemmFP16Common.h",
+        "src/FbgemmFP16UKernelsAvx2.h",
+        "src/FbgemmI8Depthwise2DAvx2-inl.h",
+        "src/FbgemmI8DepthwiseAvx2-inl.h",
+        "src/MaskAvx2.h",
+        "src/OptimizedKernelsAvx2.h",
+        "src/TransposeUtils.h",
+        "src/TransposeUtilsAvx2.h",
+    ],
+    copts = [
+        "-m64",
+        "-mavx2",
+        "-mfma",
+        "-mf16c",
+        "-masm=intel",
+    ],
+    deps = [
+        ":fbgemm_headers",
+    ],
+    linkstatic = 1,
+)
+
+cc_library(
+    name = "fbgemm_avx2_headers",
+    includes = [
+        "src",
+    ],
+    hdrs = [
+        "src/FbgemmFP16UKernelsAvx2.h",
+        "src/MaskAvx2.h",
+        "src/OptimizedKernelsAvx2.h",
+    ],
+)
+
+cc_library(
+    name = "fbgemm_avx512",
+    srcs = [
+        "src/FbgemmBfloat16ConvertAvx512.cc",
+        "src/FbgemmFloat16ConvertAvx512.cc",
+        "src/UtilsAvx512.cc",
+        # Inline Assembly sources
+        "src/FbgemmFP16UKernelsAvx512.cc",
+        "src/FbgemmFP16UKernelsAvx512_256.cc",
+        # Private headers
+        "src/FbgemmFP16UKernelsAvx512.h",
+        "src/FbgemmFP16Common.h",
+        "src/MaskAvx2.h",
+        "src/TransposeUtils.h",
+        "src/TransposeUtilsAvx2.h",
+    ],
+    hdrs = [
+        "src/FbgemmFP16UKernelsAvx512_256.h",
+    ],
+    copts = [
+        "-m64",
+        "-mfma",
+        "-mavx512f",
+        "-mavx512bw",
+        "-mavx512dq",
+        "-mavx512vl",
+        "-masm=intel",
+    ],
+    deps = [
+        ":fbgemm_headers",
+    ],
+    linkstatic = 1,
+)
+
+cc_library(
+    name = "fbgemm_avx512_headers",
+    includes = [
+        "src",
+    ],
+    hdrs = [
+        "src/FbgemmFP16UKernelsAvx512.h",
+        "src/FbgemmFP16UKernelsAvx512_256.h",
+    ],
+)
+
+cc_library(
+    name = "fbgemm_headers",
+    hdrs = [
+        "include/fbgemm/ConvUtils.h",
+        "include/fbgemm/Fbgemm.h",
+        "include/fbgemm/FbgemmBuild.h",
+        "include/fbgemm/FbgemmConvert.h",
+        "include/fbgemm/FbgemmEmbedding.h",
+        "include/fbgemm/FbgemmFP16.h",
+        "include/fbgemm/FbgemmI64.h",
+        "include/fbgemm/FbgemmI8DepthwiseAvx2.h",
+        "include/fbgemm/FbgemmI8Spmdm.h",
+        "include/fbgemm/OutputProcessing-inl.h",
+        "include/fbgemm/PackingTraits-inl.h",
+        "include/fbgemm/QuantUtils.h",
+        "include/fbgemm/QuantUtilsAvx2.h",
+        "include/fbgemm/Types.h",
+        "include/fbgemm/Utils.h",
+        "include/fbgemm/UtilsAvx2.h",
+    ],
+    includes = [
+        "include",
+    ],
+    visibility = ["//visibility:public"],
+)
--- a/third_party/foxi.BUILD
+++ b/third_party/foxi.BUILD
@ -0,0 +1,16 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+cc_library(
+    name = "foxi",
+    srcs = [
+        "foxi/onnxifi_loader.c",
+    ],
+    hdrs = glob([
+        "foxi/*.h",
+    ]),
+    includes = [
+        ".",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
--- a/third_party/gloo.BUILD
+++ b/third_party/gloo.BUILD
@ -0,0 +1,85 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+load("@//tools/rules:cu.bzl", "cu_library")
+load("@//third_party:substitution.bzl", "template_rule")
+load("@//tools/config:defs.bzl", "if_cuda")
+
+template_rule(
+    name = "gloo_config_cmake_macros",
+    src = "gloo/config.h.in",
+    out = "gloo/config.h",
+    substitutions = {
+        "@GLOO_VERSION_MAJOR@": "0",
+        "@GLOO_VERSION_MINOR@": "5",
+        "@GLOO_VERSION_PATCH@": "0",
+        "cmakedefine01 GLOO_USE_CUDA": "define GLOO_USE_CUDA 1",
+        "cmakedefine01 GLOO_USE_NCCL": "define GLOO_USE_NCCL 0",
+        "cmakedefine01 GLOO_USE_ROCM": "define GLOO_USE_ROCM 0",
+        "cmakedefine01 GLOO_USE_RCCL": "define GLOO_USE_RCCL 0",
+        "cmakedefine01 GLOO_USE_REDIS": "define GLOO_USE_REDIS 0",
+        "cmakedefine01 GLOO_USE_IBVERBS": "define GLOO_USE_IBVERBS 0",
+        "cmakedefine01 GLOO_USE_MPI": "define GLOO_USE_MPI 0",
+        "cmakedefine01 GLOO_USE_AVX": "define GLOO_USE_AVX 0",
+        "cmakedefine01 GLOO_USE_LIBUV": "define GLOO_USE_LIBUV 0",
+        "cmakedefine01 GLOO_HAVE_TRANSPORT_TCP": "define GLOO_HAVE_TRANSPORT_TCP 1",
+        "cmakedefine01 GLOO_HAVE_TRANSPORT_IBVERBS": "define GLOO_HAVE_TRANSPORT_IBVERBS 0",
+        "cmakedefine01 GLOO_HAVE_TRANSPORT_UV": "define GLOO_HAVE_TRANSPORT_UV 0",
+    },
+)
+
+cc_library(
+    name = "gloo_headers",
+    hdrs = glob(
+        [
+            "gloo/*.h",
+            "gloo/common/*.h",
+            "gloo/rendezvous/*.h",
+            "gloo/transport/*.h",
+            "gloo/transport/tcp/*.h",
+        ],
+        exclude = [
+            "gloo/rendezvous/redis_store.h",
+        ],
+    ) + ["gloo/config.h"],
+    includes = [
+        ".",
+    ],
+)
+
+cu_library(
+    name = "gloo_cuda",
+    srcs = [
+        "gloo/cuda.cu.cc",
+        "gloo/cuda_private.cu.cc",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":gloo_headers",
+    ],
+    alwayslink = True,
+)
+
+cc_library(
+    name = "gloo",
+    srcs = glob(
+        [
+            "gloo/*.cc",
+            "gloo/common/*.cc",
+            "gloo/rendezvous/*.cc",
+            "gloo/transport/*.cc",
+            "gloo/transport/tcp/*.cc",
+        ],
+        exclude = [
+            "gloo/cuda*.cc",
+            "gloo/rendezvous/redis_store.cc",
+        ],
+    ),
+    copts = [
+        "-std=gnu++11",
+        "-std=c++11",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":gloo_headers"] + if_cuda(
+        [":gloo_cuda"],
+        [],
+    ),
+)
--- a/third_party/ideep.BUILD
+++ b/third_party/ideep.BUILD
@ -0,0 +1,17 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+cc_library(
+    name = "ideep",
+    hdrs = glob([
+        "include/**/*.hpp",
+        "include/**/*.h",
+    ]),
+    defines = [
+        "IDEEP_USE_MKL",
+    ],
+    includes = [
+        "include/",
+    ],
+    visibility = ["//visibility:public"],
+    deps = ["@mkl_dnn//:mkl-dnn"],
+)
--- a/third_party/miniz-2.0.8/BUILD.bazel
+++ b/third_party/miniz-2.0.8/BUILD.bazel
@ -0,0 +1,10 @@
+cc_library(
+    name = "miniz",
+    srcs = [
+        "miniz.c",
+    ],
+    hdrs = [
+        "miniz.h",
+    ],
+    visibility = ["//visibility:public"],
+)
--- a/third_party/mkl-dnn.BUILD
+++ b/third_party/mkl-dnn.BUILD
@ -0,0 +1,89 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+load("@//third_party:substitution.bzl", "template_rule")
+
+template_rule(
+    name = "include_dnnl_version",
+    src = "include/dnnl_version.h.in",
+    out = "include/dnnl_version.h",
+    substitutions = {
+        "@DNNL_VERSION_MAJOR@": "1",
+        "@DNNL_VERSION_MINOR@": "2",
+        "@DNNL_VERSION_PATCH@": "0",
+        "@DNNL_VERSION_HASH@": "70f8b879ea7a0c38caedb3320b7c85e8497ff50d",
+    },
+)
+
+template_rule(
+    name = "include_dnnl_config",
+    src = "include/dnnl_config.h.in",
+    out = "include/dnnl_config.h",
+    substitutions = {
+        "cmakedefine": "define",
+        "${DNNL_CPU_THREADING_RUNTIME}": "OMP",
+        "${DNNL_CPU_RUNTIME}": "OMP",
+        "${DNNL_GPU_RUNTIME}": "NONE",
+    },
+)
+
+cc_library(
+    name = "mkl-dnn",
+    srcs = glob([
+        "src/common/*.cpp",
+        "src/cpu/*.cpp",
+        "src/cpu/binary/*.cpp",
+        "src/cpu/gemm/*.cpp",
+        "src/cpu/gemm/bf16/*.cpp",
+        "src/cpu/gemm/f32/*.cpp",
+        "src/cpu/gemm/s8x8s32/*.cpp",
+        "src/cpu/jit_utils/*.cpp",
+        "src/cpu/jit_utils/jitprofiling/*.c",
+        "src/cpu/jit_utils/linux_perf/*.cpp",
+        "src/cpu/matmul/*.cpp",
+        "src/cpu/resampling/*.cpp",
+        "src/cpu/rnn/*.cpp",
+    ]),
+    hdrs = glob([
+        "include/*.h",
+        "include/*.hpp",
+        "src/*.hpp",
+        "src/cpu/**/*.hpp",
+        "src/cpu/**/*.h",
+        "src/common/*.hpp",
+        "src/cpu/rnn/*.hpp",
+    ]) + [
+        "include/dnnl_version.h",
+        "include/dnnl_config.h",
+    ],
+    copts = [
+        "-DUSE_AVX",
+        "-DUSE_AVX2",
+        "-DDNNL_DLL",
+        "-DDNNL_DLL_EXPORTS",
+        "-DDNNL_ENABLE_CONCURRENT_EXEC",
+        "-DTH_BLAS_MKL",
+        "-D__STDC_CONSTANT_MACROS",
+        "-D__STDC_LIMIT_MACROS",
+        "-fno-strict-overflow",
+        "-fopenmp",
+    ] + select({
+        "@//tools/config:thread_sanitizer": ["-DMKLDNN_THR=0"],
+        "//conditions:default": ["-DMKLDNN_THR=2"],
+    }),
+    includes = [
+        "include/",
+        "src/",
+        "src/common/",
+        "src/cpu/",
+        "src/cpu/xbyak/",
+    ],
+    visibility = ["//visibility:public"],
+    linkopts = [
+        "-lgomp",
+    ],
+    deps = [
+        "@mkl",
+    ] + select({
+        "@//tools/config:thread_sanitizer": [],
+        "//conditions:default": ["@tbb"],
+    }),
+)
--- a/third_party/mkl.BUILD
+++ b/third_party/mkl.BUILD
@ -0,0 +1,21 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+cc_library(
+    name = "mkl",
+    srcs = [
+        "libmkl_avx2.so",
+        "libmkl_core.so",
+        "libmkl_def.so",
+        "libmkl_intel_lp64.so",
+        "libmkl_rt.so",
+        "libmkl_sequential.so",
+        "libmkl_vml_avx2.so",
+        "libmkl_vml_avx512.so",
+        "libmkl_vml_def.so",
+    ] + select({
+        "@//tools/config:thread_sanitizer": [],
+        "//conditions:default": ["libmkl_tbb_thread.so"],
+    }),
+    visibility = ["//visibility:public"],
+    deps = ["@mkl_headers"],
+)
--- a/third_party/mkl_headers.BUILD
+++ b/third_party/mkl_headers.BUILD
@ -0,0 +1,8 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+cc_library(
+    name = "mkl_headers",
+    hdrs = glob(["include/*.h"]),
+    includes = ["include/"],
+    visibility = ["//visibility:public"],
+)
--- a/third_party/onnx.BUILD
+++ b/third_party/onnx.BUILD
@ -0,0 +1,113 @@
+load("@rules_proto//proto:defs.bzl", "proto_library")
+load("@rules_cc//cc:defs.bzl", "cc_library", "cc_proto_library")
+load("@rules_python//python:defs.bzl", "py_binary")
+
+py_binary(
+    name = "gen_proto",
+    srcs = ["onnx/gen_proto.py"],
+    data = [
+        "onnx/onnx.in.proto",
+        "onnx/onnx-operators.in.proto",
+    ],
+)
+
+genrule(
+    name = "generate_onnx_proto",
+    outs = [
+        "onnx/onnx_onnx_torch-ml.proto",
+        "onnx/onnx-ml.pb.h",
+    ],
+    cmd = "$(location :gen_proto) -p onnx_torch -o $(@D)/onnx onnx -m >/dev/null && sed -i 's/onnx_onnx_torch-ml.pb.h/onnx\\/onnx_onnx_torch-ml.pb.h/g' $(@D)/onnx/onnx-ml.pb.h",
+    tools = [":gen_proto"],
+)
+
+genrule(
+    name = "generate_onnx_operators_proto",
+    outs = [
+        "onnx/onnx-operators_onnx_torch-ml.proto",
+        "onnx/onnx-operators-ml.pb.h",
+    ],
+    cmd = "$(location :gen_proto) -p onnx_torch -o $(@D)/onnx onnx-operators -m >/dev/null && sed -i 's/onnx-operators_onnx_torch-ml.pb.h/onnx\\/onnx-operators_onnx_torch-ml.pb.h/g' $(@D)/onnx/onnx-operators-ml.pb.h",
+    tools = [":gen_proto"],
+)
+
+cc_library(
+    name = "onnx",
+    srcs = glob(
+        [
+            "onnx/*.cc",
+            "onnx/common/*.cc",
+            "onnx/defs/*.cc",
+            "onnx/defs/controlflow/*.cc",
+            "onnx/defs/experiments/*.cc",
+            "onnx/defs/generator/*.cc",
+            "onnx/defs/logical/*.cc",
+            "onnx/defs/math/*.cc",
+            "onnx/defs/nn/*.cc",
+            "onnx/defs/object_detection/*.cc",
+            "onnx/defs/quantization/*.cc",
+            "onnx/defs/reduction/*.cc",
+            "onnx/defs/rnn/*.cc",
+            "onnx/defs/sequence/*.cc",
+            "onnx/defs/tensor/*.cc",
+            "onnx/defs/traditionalml/*.cc",
+            "onnx/defs/traditionalml/*.cc",
+            "onnx/optimizer/*.cc",
+            "onnx/shape_inference/*.cc",
+            "onnx/version_converter/*.cc",
+        ],
+        exclude = [
+            "onnx/cpp2py_export.cc",
+        ],
+    ),
+    hdrs = glob([
+        "onnx/*.h",
+        "onnx/version_converter/*.h",
+        "onnx/common/*.h",
+        "onnx/defs/*.h",
+        "onnx/defs/tensor/*.h",
+        "onnx/shape_inference/*.h",
+        "onnx/optimizer/*.h",
+        "onnx/optimizer/passes/*.h",
+        "onnx/version_converter/adapters/*.h",
+    ]) + [
+        "onnx/onnx-ml.pb.h",
+        "onnx/onnx-operators-ml.pb.h",
+    ],
+    defines = [
+        "ONNX_ML=1",
+        "ONNX_NAMESPACE=onnx_torch",
+    ],
+    includes = [
+        ".",
+        "onnx/",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":onnx_proto_lib",
+    ],
+)
+
+cc_library(
+    name = "onnx_proto_headers",
+    hdrs = glob([
+        "onnx/*_pb.h",
+    ]),
+    visibility = ["//visibility:public"],
+    deps = [
+        ":onnx_proto_lib",
+    ],
+)
+
+proto_library(
+    name = "onnx_proto",
+    srcs = [
+        "onnx/onnx-operators_onnx_torch-ml.proto",
+        "onnx/onnx_onnx_torch-ml.proto",
+    ],
+)
+
+cc_proto_library(
+    name = "onnx_proto_lib",
+    deps = [":onnx_proto"],
+)
--- a/third_party/sleef.BUILD
+++ b/third_party/sleef.BUILD
@ -0,0 +1,494 @@
+load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
+load("@//third_party:sleef.bzl", "sleef_cc_library")
+
+SLEEF_COPTS = [
+    "-DHAVE_MALLOC_USABLE_SIZE=1",
+    "-DHAVE_MMAP=1",
+    "-DHAVE_SHM_OPEN=1",
+    "-DHAVE_SHM_UNLINK=1",
+    "-DIDEEP_USE_MKL",
+    "-DMKLDNN_THR=MKLDNN_THR_TBB",
+    "-DONNX_ML=1",
+    "-DONNX_NAMESPACE=onnx",
+    "-DTH_BLAS_MKL",
+    "-D_FILE_OFFSET_BITS=64",
+    "-ffp-contract=off",
+    "-fno-math-errno",
+    "-fno-trapping-math",
+    "-DCAFFE2_USE_GLOO",
+    "-DCUDA_HAS_FP16=1",
+    "-DHAVE_GCC_GET_CPUID",
+    "-DUSE_AVX",
+    "-DUSE_AVX2",
+    "-DTH_HAVE_THREAD",
+    "-std=gnu99",
+]
+
+SLEEF_COMMON_TARGET_COPTS = [
+    "-DSLEEF_STATIC_LIBS=1",
+    "-DENABLE_ALIAS=1",
+]
+
+SLEEF_PRIVATE_HEADERS = glob([
+    "build/include/*.h",
+    "src/arch/*.h",
+    "src/common/*.h",
+    "src/libm/*.h",
+    "src/libm/include/*.h",
+])
+
+SLEEF_PUBLIC_HEADERS = [
+    ":sleef_h",
+]
+
+SLEEF_PRIVATE_INCLUDES = [
+    "-Iexternal/sleef/src/arch",
+    "-Iexternal/sleef/src/common",
+]
+
+SLEEF_PUBLIC_INCLUDES = [
+    "build/include",
+]
+
+SLEEF_VISIBILITY = [
+    "@pytorch//:__subpackages__",
+]
+
+cc_binary(
+    name = "mkalias",
+    srcs = [
+        "src/libm/funcproto.h",
+        "src/libm/mkalias.c",
+    ],
+)
+
+genrule(
+    name = "alias_avx512f_h",
+    outs = ["alias_avx512f.h"],
+    cmd = "{ " + "; ".join([
+        "$(location :mkalias) -16 __m512 __m512i e avx512f",
+        "$(location :mkalias) 8 __m512d __m256i e avx512f",
+    ]) + "; } > $@",
+    tools = [":mkalias"],
+)
+
+cc_binary(
+    name = "mkdisp",
+    srcs = [
+        "src/libm/funcproto.h",
+        "src/libm/mkdisp.c",
+    ],
+    copts = SLEEF_COPTS,
+)
+
+genrule(
+    name = "dispavx_c",
+    srcs = ["src/libm/dispavx.c.org"],
+    outs = ["dispavx.c"],
+    cmd = "{ cat $(location src/libm/dispavx.c.org); $(location :mkdisp) 4 8 __m256d __m256 __m128i avx fma4 avx2; } > $@",
+    tools = [":mkdisp"],
+)
+
+genrule(
+    name = "dispsse_c",
+    srcs = ["src/libm/dispsse.c.org"],
+    outs = ["dispsse.c"],
+    cmd = "{ cat $(location src/libm/dispsse.c.org); $(location :mkdisp) 2 4 __m128d __m128 __m128i sse2 sse4 avx2128; } > $@",
+    tools = [":mkdisp"],
+)
+
+cc_binary(
+    name = "mkrename",
+    srcs = [
+        "src/libm/funcproto.h",
+        "src/libm/mkrename.c",
+    ],
+)
+
+genrule(
+    name = "renameavx_h",
+    outs = ["renameavx.h"],
+    cmd = "$(location :mkrename) cinz_ 4 8 avx > $@",
+    tools = [":mkrename"],
+)
+
+genrule(
+    name = "renameavx2_h",
+    outs = ["renameavx2.h"],
+    cmd = "$(location :mkrename) finz_ 4 8 avx2 > $@",
+    tools = [":mkrename"],
+)
+
+genrule(
+    name = "renameavx2128_h",
+    outs = ["renameavx2128.h"],
+    cmd = "$(location :mkrename) finz_ 2 4 avx2128 > $@",
+    tools = [":mkrename"],
+)
+
+genrule(
+    name = "renameavx512f_h",
+    outs = ["renameavx512f.h"],
+    cmd = "$(location :mkrename) finz_ 8 16 avx512f > $@",
+    tools = [":mkrename"],
+)
+
+genrule(
+    name = "renameavx512fnofma_h",
+    outs = ["renameavx512fnofma.h"],
+    cmd = "$(location :mkrename) cinz_ 8 16 avx512fnofma > $@",
+    tools = [":mkrename"],
+)
+
+genrule(
+    name = "renamefma4_h",
+    outs = ["renamefma4.h"],
+    cmd = "$(location :mkrename) finz_ 4 8 fma4 > $@",
+    tools = [":mkrename"],
+)
+
+genrule(
+    name = "renamepurec_scalar_h",
+    outs = ["renamepurec_scalar.h"],
+    cmd = "$(location :mkrename) cinz_ 1 1 purec > $@",
+    tools = [":mkrename"],
+)
+
+genrule(
+    name = "renamepurecfma_scalar_h",
+    outs = ["renamepurecfma_scalar.h"],
+    cmd = "$(location :mkrename) finz_ 1 1 purecfma > $@",
+    tools = [":mkrename"],
+)
+
+genrule(
+    name = "renamesse2_h",
+    outs = ["renamesse2.h"],
+    cmd = "$(location :mkrename) cinz_ 2 4 sse2 > $@",
+    tools = [":mkrename"],
+)
+
+genrule(
+    name = "renamesse4_h",
+    outs = ["renamesse4.h"],
+    cmd = "$(location :mkrename) cinz_ 2 4 sse4 > $@",
+    tools = [":mkrename"],
+)
+
+genrule(
+    name = "sleef_h",
+    srcs = [
+        "src/libm/sleeflibm_header.h.org",
+        "src/libm/sleeflibm_footer.h.org",
+    ],
+    outs = ["build/include/sleef.h"],
+    cmd = "{ " + "; ".join([
+        "cat $(location src/libm/sleeflibm_header.h.org)",
+        "$(location :mkrename) cinz_ 2 4 __m128d __m128 __m128i __m128i __SSE2__",
+        "$(location :mkrename) cinz_ 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse2",
+        "$(location :mkrename) cinz_ 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse4",
+        "$(location :mkrename) cinz_ 4 8 __m256d __m256 __m128i \"struct { __m128i x, y; }\" __AVX__",
+        "$(location :mkrename) cinz_ 4 8 __m256d __m256 __m128i \"struct { __m128i x, y; }\" __AVX__ avx",
+        "$(location :mkrename) finz_ 4 8 __m256d __m256 __m128i \"struct { __m128i x, y; }\" __AVX__ fma4",
+        "$(location :mkrename) finz_ 4 8 __m256d __m256 __m128i __m256i __AVX__ avx2",
+        "$(location :mkrename) finz_ 2 4 __m128d __m128 __m128i __m128i __SSE2__ avx2128",
+        "$(location :mkrename) finz_ 8 16 __m512d __m512 __m256i __m512i __AVX512F__",
+        "$(location :mkrename) finz_ 8 16 __m512d __m512 __m256i __m512i __AVX512F__ avx512f",
+        "$(location :mkrename) cinz_ 8 16 __m512d __m512 __m256i __m512i __AVX512F__ avx512fnofma",
+        "$(location :mkrename) cinz_ 1 1 double float int32_t int32_t __STDC__ purec",
+        "$(location :mkrename) finz_ 1 1 double float int32_t int32_t FP_FAST_FMA purecfma",
+        "cat $(location src/libm/sleeflibm_footer.h.org)",
+    ]) + "; } > $@",
+    tools = [":mkrename"],
+)
+
+cc_library(
+    name = "sleef",
+    srcs = [
+        "src/libm/rempitab.c",
+        "src/libm/sleefdp.c",
+        "src/libm/sleefld.c",
+        "src/libm/sleefqp.c",
+        "src/libm/sleefsp.c",
+    ],
+    hdrs = SLEEF_PUBLIC_HEADERS,
+    copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+        "-DDORENAME=1",
+        "-DENABLEFLOAT128=1",
+        "-Wno-unused-result",
+    ],
+    includes = SLEEF_PUBLIC_INCLUDES,
+    # -lgcc resolves
+    # U __addtf3
+    # U __eqtf2
+    # U __fixtfdi
+    # U __floatditf
+    # U __gttf2
+    # U __lttf2
+    # U __multf3
+    # U __subtf3
+    # in bazel-bin/external/sleef/_objs/sleef/sleefqp.pic.o
+    linkopts = [
+        "-lgcc",
+    ],
+    linkstatic = True,
+    visibility = SLEEF_VISIBILITY,
+    # The purpose of the lists in deps is to keep related pairs of
+    # libraries together. In particular, each pair that contains a *det*
+    # library originates with a sleef_cc_library().
+    deps = [
+        ":common",
+        ":dispavx",
+        ":dispsse",
+    ] + [
+        ":sleefavx",
+        ":sleefdetavx",
+    ] + [
+        ":sleefavx2",
+        ":sleefdetavx2",
+    ] + [
+        ":sleefavx2128",
+        ":sleefdetavx2128",
+    ] + [
+        ":sleefavx512f",
+        ":sleefdetavx512f",
+    ] + [
+        ":sleefavx512fnofma",
+        ":sleefdetavx512fnofma",
+    ] + [
+        ":sleeffma4",
+        ":sleefdetfma4",
+    ] + [
+        ":sleefsse2",
+        ":sleefdetsse2",
+    ] + [
+        ":sleefsse4",
+        ":sleefdetsse4",
+    ] + [
+        ":sleefpurec_scalar",
+        ":sleefdetpurec_scalar",
+    ] + [
+        ":sleefpurecfma_scalar",
+        ":sleefdetpurecfma_scalar",
+    ],
+    alwayslink = True,
+)
+
+cc_library(
+    name = "common",
+    srcs = SLEEF_PRIVATE_HEADERS + [
+        "src/common/common.c",
+    ],
+    copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + [
+        "-Wno-unused-result",
+    ],
+    linkstatic = True,
+    visibility = SLEEF_VISIBILITY,
+    alwayslink = True,
+)
+
+cc_library(
+    name = "dispavx",
+    srcs = SLEEF_PRIVATE_HEADERS + SLEEF_PUBLIC_HEADERS + [
+        ":dispavx_c",
+    ],
+    copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+        "-DENABLE_AVX2=1",
+        "-DENABLE_FMA4=1",
+        "-mavx",
+    ],
+    includes = SLEEF_PUBLIC_INCLUDES,
+    linkstatic = True,
+    visibility = SLEEF_VISIBILITY,
+    alwayslink = True,
+)
+
+cc_library(
+    name = "dispsse",
+    srcs = SLEEF_PRIVATE_HEADERS + SLEEF_PUBLIC_HEADERS + [
+        ":dispsse_c",
+    ],
+    copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+        "-DENABLE_AVX2=1",
+        "-DENABLE_FMA4=1",
+        "-msse2",
+    ],
+    includes = SLEEF_PUBLIC_INCLUDES,
+    linkstatic = True,
+    visibility = SLEEF_VISIBILITY,
+    alwayslink = True,
+)
+
+sleef_cc_library(
+    name = "sleefavx512f",
+    srcs = SLEEF_PRIVATE_HEADERS + [
+        "src/libm/sleefsimddp.c",
+        "src/libm/sleefsimdsp.c",
+        ":alias_avx512f_h",
+        ":renameavx512f_h",
+    ],
+    copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+        "-DDORENAME=1",
+        "-DALIAS_NO_EXT_SUFFIX=\\\"alias_avx512f.h\\\"",
+        "-DENABLE_AVX512F=1",
+        "-mavx512f",
+    ],
+    linkstatic = True,
+    visibility = SLEEF_VISIBILITY,
+    alwayslink = True,
+)
+
+sleef_cc_library(
+    name = "sleefavx512fnofma",
+    srcs = SLEEF_PRIVATE_HEADERS + [
+        "src/libm/sleefsimddp.c",
+        "src/libm/sleefsimdsp.c",
+        ":renameavx512fnofma_h",
+    ],
+    copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+        "-DDORENAME=1",
+        "-DENABLE_AVX512FNOFMA=1",
+        "-mavx512f",
+    ],
+    linkstatic = True,
+    visibility = SLEEF_VISIBILITY,
+    alwayslink = True,
+)
+
+sleef_cc_library(
+    name = "sleefavx",
+    srcs = SLEEF_PRIVATE_HEADERS + [
+        "src/libm/sleefsimddp.c",
+        "src/libm/sleefsimdsp.c",
+        ":renameavx_h",
+    ],
+    copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+        "-DDORENAME=1",
+        "-DENABLE_AVX=1",
+        "-mavx",
+    ],
+    linkstatic = True,
+    visibility = SLEEF_VISIBILITY,
+    alwayslink = True,
+)
+
+sleef_cc_library(
+    name = "sleefavx2",
+    srcs = SLEEF_PRIVATE_HEADERS + [
+        "src/libm/sleefsimddp.c",
+        "src/libm/sleefsimdsp.c",
+        ":renameavx2_h",
+    ],
+    copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+        "-DDORENAME=1",
+        "-DENABLE_AVX2=1",
+        "-mavx2",
+        "-mfma",
+    ],
+    linkstatic = True,
+    visibility = SLEEF_VISIBILITY,
+    alwayslink = True,
+)
+
+sleef_cc_library(
+    name = "sleefavx2128",
+    srcs = SLEEF_PRIVATE_HEADERS + [
+        "src/libm/sleefsimddp.c",
+        "src/libm/sleefsimdsp.c",
+        ":renameavx2128_h",
+    ],
+    copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+        "-DDORENAME=1",
+        "-DENABLE_AVX2128=1",
+        "-mavx2",
+        "-mfma",
+    ],
+    linkstatic = True,
+    visibility = SLEEF_VISIBILITY,
+    alwayslink = True,
+)
+
+sleef_cc_library(
+    name = "sleeffma4",
+    srcs = SLEEF_PRIVATE_HEADERS + [
+        "src/libm/sleefsimddp.c",
+        "src/libm/sleefsimdsp.c",
+        ":renamefma4_h",
+    ],
+    copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+        "-DDORENAME=1",
+        "-DENABLE_FMA4=1",
+        "-mfma4",
+    ],
+    linkstatic = True,
+    visibility = SLEEF_VISIBILITY,
+    alwayslink = True,
+)
+
+sleef_cc_library(
+    name = "sleefsse2",
+    srcs = SLEEF_PRIVATE_HEADERS + [
+        "src/libm/sleefsimddp.c",
+        "src/libm/sleefsimdsp.c",
+        ":renamesse2_h",
+    ],
+    copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+        "-DDORENAME=1",
+        "-DENABLE_SSE2=1",
+        "-msse2",
+    ],
+    linkstatic = True,
+    visibility = SLEEF_VISIBILITY,
+    alwayslink = True,
+)
+
+sleef_cc_library(
+    name = "sleefsse4",
+    srcs = SLEEF_PRIVATE_HEADERS + [
+        "src/libm/sleefsimddp.c",
+        "src/libm/sleefsimdsp.c",
+        ":renamesse4_h",
+    ],
+    copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+        "-DDORENAME=1",
+        "-DENABLE_SSE4=1",
+        "-msse4.1",
+    ],
+    linkstatic = True,
+    visibility = SLEEF_VISIBILITY,
+    alwayslink = True,
+)
+
+sleef_cc_library(
+    name = "sleefpurec_scalar",
+    srcs = SLEEF_PRIVATE_HEADERS + [
+        "src/libm/sleefsimddp.c",
+        "src/libm/sleefsimdsp.c",
+        ":renamepurec_scalar_h",
+    ],
+    copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+        "-DDORENAME=1",
+        "-DENABLE_PUREC_SCALAR=1",
+    ],
+    linkstatic = True,
+    visibility = SLEEF_VISIBILITY,
+    alwayslink = True,
+)
+
+sleef_cc_library(
+    name = "sleefpurecfma_scalar",
+    srcs = SLEEF_PRIVATE_HEADERS + [
+        "src/libm/sleefsimddp.c",
+        "src/libm/sleefsimdsp.c",
+        ":renamepurecfma_scalar_h",
+    ],
+    copts = SLEEF_PRIVATE_INCLUDES + SLEEF_COPTS + SLEEF_COMMON_TARGET_COPTS + [
+        "-DDORENAME=1",
+        "-DENABLE_PURECFMA_SCALAR=1",
+        "-mavx2",
+        "-mfma",
+    ],
+    linkstatic = True,
+    visibility = SLEEF_VISIBILITY,
+    alwayslink = True,
+)
--- a/third_party/sleef.bzl
+++ b/third_party/sleef.bzl
@ -0,0 +1,22 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+# This macro provides for generating both "sleef<foo>" and
+# "sleefdet<foo>" libraries for a given set of code. The difference is
+# that the "det" libraries get compiled with "-DDETERMINISTIC=1".
+
+def sleef_cc_library(name, copts, **kwargs):
+    cc_library(
+        name = name,
+        copts = copts,
+        **kwargs
+    )
+
+    prefix = "sleef"
+    if not name.startswith(prefix):
+        fail("name {} does not start with {}".format(repr(name), repr(prefix)))
+
+    cc_library(
+        name = name.replace(prefix, prefix + "det", 1),
+        copts = copts + ["-DDETERMINISTIC=1"],
+        **kwargs
+    )
--- a/third_party/substitution.bzl
+++ b/third_party/substitution.bzl
@ -0,0 +1,44 @@
+# This Bazel rules file is derived from https://github.com/tensorflow/tensorflow/blob/master/third_party/common.bzl
+
+# Rule for simple expansion of template files. This performs a simple
+# search over the template file for the keys in substitutions,
+# and replaces them with the corresponding values.
+#
+# Typical usage:
+#   load("/tools/build_rules/template_rule", "template_rule")
+#   template_rule(
+#       name = "ExpandMyTemplate",
+#       src = "my.template",
+#       out = "my.txt",
+#       substitutions = {
+#         "$VAR1": "foo",
+#         "$VAR2": "bar",
+#       }
+#   )
+#
+# Args:
+#   name: The name of the rule.
+#   template: The template file to expand
+#   out: The destination of the expanded file
+#   substitutions: A dictionary mapping strings to their substitutions
+
+def template_rule_impl(ctx):
+    ctx.actions.expand_template(
+        template = ctx.file.src,
+        output = ctx.outputs.out,
+        substitutions = ctx.attr.substitutions,
+    )
+
+template_rule = rule(
+    attrs = {
+        "src": attr.label(
+            mandatory = True,
+            allow_single_file = True,
+        ),
+        "out": attr.output(mandatory = True),
+        "substitutions": attr.string_dict(mandatory = True),
+    },
+    # output_to_genfiles is required for header files.
+    output_to_genfiles = True,
+    implementation = template_rule_impl,
+)
--- a/third_party/tbb.BUILD
+++ b/third_party/tbb.BUILD
@ -0,0 +1,75 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+load("@//third_party:substitution.bzl", "template_rule")
+
+licenses(["notice"])  # Apache 2.0
+
+template_rule(
+    name = "version_string",
+    src = "@//:aten/src/ATen/cpu/tbb/extra/version_string.ver.in",
+    out = "version_string.h",
+    substitutions = {
+        "@CMAKE_SYSTEM_NAME@": "Unknown",
+        "@CMAKE_SYSTEM@": "Unknown",
+        "@CMAKE_SYSTEM_VERSION@": "Unknown",
+        "@CMAKE_CXX_COMPILER_ID@": "Unknown",
+        "@_configure_date@": "Unknown",
+    }
+)
+
+cc_library(
+    name = "tbb",
+    srcs = [":version_string"] + glob(
+        [
+            "src/old/*.h",
+            "src/rml/client/*.h",
+            "src/rml/include/*.h",
+            "src/rml/server/*.h",
+            "src/tbb/*.h",
+            "src/tbb/tools_api/*.h",
+            "src/tbb/tools_api/legacy/*.h",
+            "src/old/*.cpp",
+            "src/tbb/*.cpp",
+        ],
+        exclude = ["src/old/test_*.cpp"],
+    ) + ["src/rml/client/rml_tbb.cpp"],
+    hdrs = glob(
+        [
+            "include/tbb/*",
+            "include/tbb/compat/*",
+            "include/tbb/internal/*",
+            "include/tbb/machine/*",
+        ],
+        exclude = ["include/tbb/scalable_allocator.h"],
+    ),
+    copts = [
+        "-Iexternal/tbb/src/rml/include",
+        "-Iexternal/tbb/src",
+        "-pthread",
+        "-DDO_ITT_NOTIFY=1",
+        "-DUSE_PTHREAD=1",
+        "-D__TBB_BUILD=1",
+        "-D__TBB_DYNAMIC_LOAD_ENABLED=0",
+        "-D__TBB_SOURCE_DIRECTLY_INCLUDED=1",
+        "-fno-sanitize=vptr",
+        "-fno-sanitize=thread",
+    ],
+    defines = [
+        # TBB Cannot detect the standard library version when using clang with libstdc++.
+        # See https://github.com/01org/tbb/issues/22
+        "TBB_USE_GLIBCXX_VERSION=(_GLIBCXX_RELEASE*10000)",
+        "TBB_PREVIEW_GLOBAL_CONTROL=1",
+        "TBB_PREVIEW_LOCAL_OBSERVER=1",
+        "__TBB_ALLOW_MUTABLE_FUNCTORS=1",
+    ],
+    includes = [
+        "include",
+        "src/tbb/tools_api",
+    ],
+    linkopts = [
+        "-ldl",
+        "-lpthread",
+        "-lrt",
+    ],
+    textual_hdrs = ["src/tbb/tools_api/ittnotify_static.c"],
+    visibility = ["//visibility:public"],
+)
--- a/third_party/tbb.patch
+++ b/third_party/tbb.patch
@ -0,0 +1,34 @@
+diff --git a/src/rml/server/rml_server.cpp b/src/rml/server/rml_server.cpp
+index 2508465..1e22ad2 100644
+--- a/src/rml/server/rml_server.cpp
+++ b/src/rml/server/rml_server.cpp
+@@ -3279,10 +3279,10 @@ extern "C" void __KMP_call_with_my_server_info( ::rml::server_info_callback_t cb
+ /*
+  * RML server info
+  */
+-#include "version_string.ver"
+#include "version_string.h"
+ 
+ #ifndef __TBB_VERSION_STRINGS
+-#pragma message("Warning: version_string.ver isn't generated properly by version_info.sh script!")
+#pragma message("Warning: version_string.h isn't generated properly by version_info.sh script!")
+ #endif
+ 
+ // We use the build time as the RML server info. TBB is required to build RML, so we make it the same as the TBB build time.
+diff --git a/src/tbb/tbb_version.h b/src/tbb/tbb_version.h
+index dcaa55b..4981a8a 100644
+--- a/src/tbb/tbb_version.h
+++ b/src/tbb/tbb_version.h
+@@ -25,10 +25,10 @@
+ #ifndef ENDL
+ #define ENDL "\n"
+ #endif
+-#include "version_string.ver"
+#include "version_string.h"
+ 
+ #ifndef __TBB_VERSION_STRINGS
+-#pragma message("Warning: version_string.ver isn't generated properly by version_info.sh script!")
+#pragma message("Warning: version_string.h isn't generated properly by version_info.sh script!")
+ // here is an example of macros value:
+ #define __TBB_VERSION_STRINGS \
+ "TBB: BUILD_HOST\tUnknown\n" \
--- a/tools/config/BUILD
+++ b/tools/config/BUILD
@ -0,0 +1,42 @@
+load("@bazel_skylib//lib:selects.bzl", "selects")
+
+config_setting(
+    name = "cuda",
+    define_values = {
+        "cuda": "true",
+    },
+)
+
+# Even when building with --config=cuda, host targets should be built with cuda disabled
+# as these targets will run on CI machines that have no GPUs.
+selects.config_setting_group(
+    name = "cuda_enabled_and_capable",
+    match_all = [
+        ":cuda",
+        "//tools/toolchain:is_cuda_capable",
+    ],
+)
+
+# Configures the system to build with cuda using clang.
+config_setting(
+    name = "cuda_clang",
+    define_values = {
+        "cuda_clang": "true",
+    },
+)
+
+# Indicates that cuda code should be compiled with nvcc
+# Mostly exists to support _analysis_ of tensorflow; more work is needed to actually make this
+# setting work.
+config_setting(
+    name = "cuda_nvcc",
+    define_values = {
+        "cuda_nvcc": "true",
+    },
+)
+
+config_setting(
+    name = "thread_sanitizer",
+    define_values = {"thread_sanitizer": "1"},
+    visibility = ["//visibility:public"],
+)
--- a/tools/config/defs.bzl
+++ b/tools/config/defs.bzl
@ -0,0 +1,65 @@
+"""
+ Macros for selecting with / without various GPU libraries.  Most of these are meant to be used
+ directly by tensorflow in place of their build's own configure.py + bazel-gen system.
+"""
+
+load("@bazel_skylib//lib:selects.bzl", "selects")
+
+def if_cuda(if_true, if_false = []):
+    """Helper for selecting based on the whether CUDA is configured. """
+    return selects.with_or({
+        "@//tools/config:cuda_enabled_and_capable": if_true,
+        "//conditions:default": if_false,
+    })
+
+def if_tensorrt(if_true, if_false = []):
+    """Helper for selecting based on the whether TensorRT is configured. """
+    return select({
+        "//conditions:default": if_false,
+    })
+
+def if_rocm(if_true, if_false = []):
+    """Helper for selecting based on the whether ROCM is configured. """
+    return select({
+        "//conditions:default": if_false,
+    })
+
+def if_sycl(if_true, if_false = []):
+    """Helper for selecting based on the whether SYCL/ComputeCPP is configured."""
+
+    # NOTE: Tensorflow expects some stange behavior (see their if_sycl) if we
+    # actually plan on supporting this at some point.
+    return select({
+        "//conditions:default": if_false,
+    })
+
+def if_ccpp(if_true, if_false = []):
+    """Helper for selecting based on the whether ComputeCPP is configured. """
+    return select({
+        "//conditions:default": if_false,
+    })
+
+def cuda_default_copts():
+    return if_cuda(["-DGOOGLE_CUDA=1"])
+
+def cuda_default_features():
+    return if_cuda(["-per_object_debug_info", "-use_header_modules", "cuda_clang"])
+
+def rocm_default_copts():
+    return if_rocm(["-x", "rocm"])
+
+def rocm_copts(opts = []):
+    return rocm_default_copts() + if_rocm(opts)
+
+def cuda_is_configured():
+    # FIXME(dcollins): currently only used by tensorflow's xla stuff, which we aren't building.  However bazel
+    # query hits it so this needs to be defined.  Because bazel doesn't actually resolve config at macro expansion
+    # time, `select` can't be used here (since xla expects lists of strings and not lists of select objects).
+    # Instead, the xla build rules must be rewritten to use `if_cuda_is_configured`
+    return False
+
+def if_cuda_is_configured(x):
+    return if_cuda(x, [])
+
+def if_rocm_is_configured(x):
+    return if_rocm(x, [])
--- a/tools/rules/BUILD
+++ b/tools/rules/BUILD
--- a/tools/rules/cu.bzl
+++ b/tools/rules/cu.bzl
@ -0,0 +1,3 @@
+# gpu support is not available
+def cu_library(**kwargs):
+  pass
--- a/tools/rules/workspace.bzl
+++ b/tools/rules/workspace.bzl
@ -0,0 +1,29 @@
+def _impl(repository_ctx):
+  archive = repository_ctx.attr.name + ".tar"
+  reference = Label("@%s_unpatched//:README" % repository_ctx.attr.name)
+  dirname = repository_ctx.path(reference).dirname
+  repository_ctx.execute(["tar", "hcf", archive, "-C", dirname, "."])
+  repository_ctx.extract(archive)
+  for patch in repository_ctx.attr.patches:
+    repository_ctx.patch(repository_ctx.path(patch), repository_ctx.attr.patch_strip)
+  build_file = repository_ctx.path(repository_ctx.attr.build_file)
+  repository_ctx.execute(["cp", build_file, "BUILD.bazel"])
+
+_patched_rule = repository_rule(
+    implementation = _impl,
+    attrs = {
+        "patches": attr.label_list(),
+        "patch_strip": attr.int(),
+        "build_file": attr.label(),
+    },
+)
+
+def new_patched_local_repository(name, path, **kwargs):
+  native.new_local_repository(
+      name = name + "_unpatched",
+      build_file_content = """
+pkg_tar(name = "content", srcs = glob(["**"]))
+""",
+      path = path,
+  )
+  _patched_rule(name = name, **kwargs)