[Bazel] Add CUDA build to CI (#66241)

Summary:
Fixes https://github.com/pytorch/pytorch/issues/35316
On master, bazel cuda build is disabled due to lack of a proper `cu_library` rule. This PR:
- Add `rules_cuda` to the WORKSPACE and forward `cu_library` to `rules_cuda`.
- Use a simple local cuda and cudnn repositories (adopted from TRTorch) for cuda 11.3.
- Fix current broken cuda build.
- Enable cuda build in CI, not just for `:torch` target but all the test binaries to catch undefined symbols.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/66241

Reviewed By: ejguan

Differential Revision: D31544091

Pulled By: malfet

fbshipit-source-id: fd3c34d0e8f80fee06f015694a4c13a8e9e12206
This commit is contained in:
Thuyen Ngo
2021-12-17 13:41:24 -08:00
committed by Facebook GitHub Bot
parent e0f4e28c69
commit e35bf56461
17 changed files with 767 additions and 828 deletions

View File

@ -1,6 +1,7 @@
build --copt=--std=c++14 build --copt=--std=c++14
build --copt=-I. build --copt=-I.
build --copt=-isystem --copt bazel-out/k8-fastbuild/bin build --copt=-isystem --copt bazel-out/k8-fastbuild/bin
build --experimental_ui_max_stdouterr_bytes=2048576
# Configuration to disable tty features for environments like CI # Configuration to disable tty features for environments like CI
build:no-tty --curses no build:no-tty --curses no
@ -11,3 +12,8 @@ build:no-tty --show_progress_rate_limit 10
build:gpu --define=cuda=true build:gpu --define=cuda=true
# define a separate build folder for faster switching between configs # define a separate build folder for faster switching between configs
build:gpu --platform_suffix=-gpu build:gpu --platform_suffix=-gpu
# rules_cuda configuration
build:gpu --@rules_cuda//cuda:enable_cuda
build:gpu --@rules_cuda//cuda:cuda_targets=sm_52
build:gpu --@rules_cuda//cuda:compiler=nvcc
build:gpu --repo_env=CUDA_PATH=/usr/local/cuda

View File

@ -20,13 +20,13 @@
"linux-docs-push", "linux-docs-push",
"linux-vulkan-bionic-py3.6-clang9", "linux-vulkan-bionic-py3.6-clang9",
"linux-xenial-cuda11.3-py3.6-gcc7", "linux-xenial-cuda11.3-py3.6-gcc7",
"linux-xenial-cuda11.3-py3.6-gcc7-bazel-test",
"linux-xenial-py3-clang5-mobile-build", "linux-xenial-py3-clang5-mobile-build",
"linux-xenial-py3-clang5-mobile-custom-build-static", "linux-xenial-py3-clang5-mobile-custom-build-static",
"linux-xenial-py3.6-clang7-asan", "linux-xenial-py3.6-clang7-asan",
"linux-xenial-py3.6-clang7-onnx", "linux-xenial-py3.6-clang7-onnx",
"linux-xenial-py3.6-gcc5.4", "linux-xenial-py3.6-gcc5.4",
"linux-xenial-py3.6-gcc7", "linux-xenial-py3.6-gcc7",
"linux-xenial-py3.6-gcc7-bazel-test",
"macos-10-15-py3-arm64", "macos-10-15-py3-arm64",
"macos-10-15-py3-lite-interpreter-x86-64", "macos-10-15-py3-lite-interpreter-x86-64",
"macos-11-py3-x86-64", "macos-11-py3-x86-64",
@ -48,7 +48,7 @@
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit" "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit"
], ],
"ciflow/bazel": [ "ciflow/bazel": [
"linux-xenial-py3.6-gcc7-bazel-test" "linux-xenial-cuda11.3-py3.6-gcc7-bazel-test"
], ],
"ciflow/cpu": [ "ciflow/cpu": [
"caffe2-linux-xenial-py3.6-gcc5.4", "caffe2-linux-xenial-py3.6-gcc5.4",
@ -56,11 +56,11 @@
"linux-docs", "linux-docs",
"linux-docs-push", "linux-docs-push",
"linux-vulkan-bionic-py3.6-clang9", "linux-vulkan-bionic-py3.6-clang9",
"linux-xenial-cuda11.3-py3.6-gcc7-bazel-test",
"linux-xenial-py3.6-clang7-asan", "linux-xenial-py3.6-clang7-asan",
"linux-xenial-py3.6-clang7-onnx", "linux-xenial-py3.6-clang7-onnx",
"linux-xenial-py3.6-gcc5.4", "linux-xenial-py3.6-gcc5.4",
"linux-xenial-py3.6-gcc7", "linux-xenial-py3.6-gcc7",
"linux-xenial-py3.6-gcc7-bazel-test",
"parallelnative-linux-xenial-py3.6-gcc5.4", "parallelnative-linux-xenial-py3.6-gcc5.4",
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single", "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit", "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
@ -85,13 +85,13 @@
"linux-docs", "linux-docs",
"linux-vulkan-bionic-py3.6-clang9", "linux-vulkan-bionic-py3.6-clang9",
"linux-xenial-cuda11.3-py3.6-gcc7", "linux-xenial-cuda11.3-py3.6-gcc7",
"linux-xenial-cuda11.3-py3.6-gcc7-bazel-test",
"linux-xenial-py3-clang5-mobile-build", "linux-xenial-py3-clang5-mobile-build",
"linux-xenial-py3-clang5-mobile-custom-build-static", "linux-xenial-py3-clang5-mobile-custom-build-static",
"linux-xenial-py3.6-clang7-asan", "linux-xenial-py3.6-clang7-asan",
"linux-xenial-py3.6-clang7-onnx", "linux-xenial-py3.6-clang7-onnx",
"linux-xenial-py3.6-gcc5.4", "linux-xenial-py3.6-gcc5.4",
"linux-xenial-py3.6-gcc7", "linux-xenial-py3.6-gcc7",
"linux-xenial-py3.6-gcc7-bazel-test",
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single", "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit", "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
"win-vs2019-cpu-py3", "win-vs2019-cpu-py3",
@ -126,13 +126,13 @@
"linux-docs-push", "linux-docs-push",
"linux-vulkan-bionic-py3.6-clang9", "linux-vulkan-bionic-py3.6-clang9",
"linux-xenial-cuda11.3-py3.6-gcc7", "linux-xenial-cuda11.3-py3.6-gcc7",
"linux-xenial-cuda11.3-py3.6-gcc7-bazel-test",
"linux-xenial-py3-clang5-mobile-build", "linux-xenial-py3-clang5-mobile-build",
"linux-xenial-py3-clang5-mobile-custom-build-static", "linux-xenial-py3-clang5-mobile-custom-build-static",
"linux-xenial-py3.6-clang7-asan", "linux-xenial-py3.6-clang7-asan",
"linux-xenial-py3.6-clang7-onnx", "linux-xenial-py3.6-clang7-onnx",
"linux-xenial-py3.6-gcc5.4", "linux-xenial-py3.6-gcc5.4",
"linux-xenial-py3.6-gcc7", "linux-xenial-py3.6-gcc7",
"linux-xenial-py3.6-gcc7-bazel-test",
"parallelnative-linux-xenial-py3.6-gcc5.4", "parallelnative-linux-xenial-py3.6-gcc5.4",
"periodic-libtorch-linux-bionic-cuda11.5-py3.6-gcc7", "periodic-libtorch-linux-bionic-cuda11.5-py3.6-gcc7",
"periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7", "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
@ -203,13 +203,13 @@
"linux-docs", "linux-docs",
"linux-vulkan-bionic-py3.6-clang9", "linux-vulkan-bionic-py3.6-clang9",
"linux-xenial-cuda11.3-py3.6-gcc7", "linux-xenial-cuda11.3-py3.6-gcc7",
"linux-xenial-cuda11.3-py3.6-gcc7-bazel-test",
"linux-xenial-py3-clang5-mobile-build", "linux-xenial-py3-clang5-mobile-build",
"linux-xenial-py3-clang5-mobile-custom-build-static", "linux-xenial-py3-clang5-mobile-custom-build-static",
"linux-xenial-py3.6-clang7-asan", "linux-xenial-py3.6-clang7-asan",
"linux-xenial-py3.6-clang7-onnx", "linux-xenial-py3.6-clang7-onnx",
"linux-xenial-py3.6-gcc5.4", "linux-xenial-py3.6-gcc5.4",
"linux-xenial-py3.6-gcc7", "linux-xenial-py3.6-gcc7",
"linux-xenial-py3.6-gcc7-bazel-test",
"macos-10-15-py3-arm64", "macos-10-15-py3-arm64",
"macos-10-15-py3-lite-interpreter-x86-64", "macos-10-15-py3-lite-interpreter-x86-64",
"macos-11-py3-x86-64", "macos-11-py3-x86-64",

View File

@ -609,8 +609,8 @@ ANDROID_WORKFLOWS = [
BAZEL_WORKFLOWS = [ BAZEL_WORKFLOWS = [
CIWorkflow( CIWorkflow(
arch="linux", arch="linux",
build_environment="linux-xenial-py3.6-gcc7-bazel-test", build_environment="linux-xenial-cuda11.3-py3.6-gcc7-bazel-test",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7", docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7",
test_runner_type=LINUX_CPU_TEST_RUNNER, test_runner_type=LINUX_CPU_TEST_RUNNER,
ciflow_config=CIFlowConfig( ciflow_config=CIFlowConfig(
labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_BAZEL, LABEL_CIFLOW_CPU, LABEL_CIFLOW_LINUX}, labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_BAZEL, LABEL_CIFLOW_CPU, LABEL_CIFLOW_LINUX},

View File

@ -0,0 +1,331 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/bazel_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: linux-xenial-cuda11.3-py3.6-gcc7-bazel-test
on:
pull_request:
types: [opened, synchronize, reopened, unassigned]
push:
branches:
- master
- release/*
- fbsync
workflow_dispatch:
env:
BUILD_ENVIRONMENT: linux-xenial-cuda11.3-py3.6-gcc7-bazel-test
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
IS_GHA: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AWS_DEFAULT_REGION: us-east-1
PR_NUMBER: ${{ github.event.pull_request.number }}
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
PYTORCH_RETRY_TEST_CASES: 1
concurrency:
group: linux-xenial-cuda11.3-py3.6-gcc7-bazel-test-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: linux.large
timeout-minutes: 240
env:
IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == 'unassigned') && (github.event.assigneed.login == 'pytorchbot') }}
LABEL_CONDITIONS: ${{ contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/bazel') || contains(github.event.pull_request.labels.*.name, 'ciflow/cpu') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/trunk') }}
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
if: ${{ (github.repository == 'pytorch/pytorch') && (
(github.event_name == 'push') ||
(github.event_name == 'schedule') ||
(contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/bazel') || contains(github.event.pull_request.labels.*.name, 'ciflow/cpu') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux')) || contains(github.event.pull_request.labels.*.name, 'ciflow/trunk') ||
((github.event_name == 'pull_request' && github.event.action != 'unassigned') && !contains(join(github.event.pull_request.labels.*.name), 'ciflow/')))
}}
steps:
- name: noop
run: echo running ciflow_should_run
- name: print labels
run: echo "${LABELS}"
# building and testing in a single job since bazel runs only small subset of tests
build-and-test:
runs-on: linux.2xlarge
needs: [ciflow_should_run]
env:
JOB_BASE_NAME: linux-xenial-cuda11.3-py3.6-gcc7-bazel-test-build-and-test
NUM_TEST_SHARDS: 1
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
- name: Chown workspace
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
retry docker pull "${ALPINE_IMAGE}"
# Ensure the working directory gets chowned back to the current user
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}"
echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}"
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
# Check if image already exists, if it does then skip building it
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
exit 0
fi
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
# if we're on the base branch then use the parent commit
MERGE_BASE=$(git rev-parse HEAD~)
else
# otherwise we're on a PR, so use the most recent base commit
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
fi
# Covers the case where a previous tag doesn't exist for the tree
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
exit 1
fi
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
# If no image exists but the hash is the same as the previous hash then we should error out here
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
echo " contact the PyTorch team to restore the original images"
exit 1
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_SKIP_S3_UPLOAD: 1
working-directory: .circleci/docker
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
./build_docker.sh
- name: Pull Docker image
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
retry docker pull "${DOCKER_IMAGE}"
- name: Determine shm-size
run: |
shm_size="1g"
case "${BUILD_ENVIRONMENT}" in
*cuda*)
shm_size="2g"
;;
*rocm*)
shm_size="8g"
;;
esac
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
- name: Output disk space left
run: |
sudo df -H
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Build
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e PR_LABELS \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && sudo chown -R jenkins /dev && .jenkins/pytorch/build.sh'
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
BRANCH: ${{ steps.parse-ref.outputs.branch }}
PR_NUMBER: ${{ github.event.pull_request.number }}
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
TAG: ${{ steps.parse-ref.outputs.tag }}
WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26 boto3==1.16.34
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Test
# Time out the test phase after 3.5 hours
timeout-minutes: 210
run: |
# detached container should get cleaned up by teardown_ec2_linux
export SHARD_NUMBER=0
# TODO: Stop building test binaries as part of the build phase
# Make sure we copy test results from bazel-testlogs symlink to
# a regular directory ./test/test-reports
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e GITHUB_ACTIONS \
-e IN_CI \
-e SHARD_NUMBER \
-e NUM_TEST_SHARDS \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e PR_LABELS \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="${SHM_SIZE}" \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && sudo chown -R jenkins /dev && .jenkins/pytorch/test.sh && cp -Lr ./bazel-testlogs ./test/test-reports'
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Zip test reports for upload
if: always()
env:
FILE_SUFFIX: 'bazel-${{ github.job }}'
run: |
# Remove any previous test reports if they exist
rm -f test-reports-*.zip
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Reports on S3
if: always()
with:
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- name: Zip JSONs for upload
if: always()
env:
FILE_SUFFIX: 'bazel-${{ github.job }}'
run: |
# Remove any previous test jsons if they exist
rm -f test-jsons-*.zip
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Downloaded JSONs on S3
if: always()
with:
retention-days: 14
if-no-files-found: warn
path:
test-jsons-*.zip
- name: Display and upload test statistics (Click Me)
if: always()
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
BRANCH: ${{ steps.parse-ref.outputs.branch }}
JOB_BASE_NAME: linux-xenial-cuda11.3-py3.6-gcc7-bazel-test-test
PR_NUMBER: ${{ github.event.pull_request.number }}
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
TAG: ${{ steps.parse-ref.outputs.tag }}
WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
shell: bash
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install boto3==1.19.12
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af

View File

@ -207,11 +207,10 @@ if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then
get_bazel get_bazel
# first build the whole torch for CPU-only # first build torch for CPU-only
tools/bazel build --config=no-tty :torch tools/bazel build --config=no-tty :torch
# then build selected set of targets with GPU-support. # then build everything with CUDA
# TODO: eventually this should converge to building the whole :torch with GPU-support tools/bazel build --config=no-tty --config=gpu :all
tools/bazel build --config=no-tty --config=gpu //c10
else else
# check that setup.py would fail with bad arguments # check that setup.py would fail with bad arguments
echo "The next three invocations are expected to fail with invalid command error messages." echo "The next three invocations are expected to fail with invalid command error messages."

View File

@ -3,7 +3,7 @@ load("@pybind11_bazel//:build_defs.bzl", "pybind_extension")
load("@rules_proto//proto:defs.bzl", "proto_library") load("@rules_proto//proto:defs.bzl", "proto_library")
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_proto_library", "cc_test") load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_proto_library", "cc_test")
load("//third_party:substitution.bzl", "header_template_rule") load("//third_party:substitution.bzl", "header_template_rule")
load("//:tools/build_variables.bzl", "torch_cpp_srcs", "libtorch_python_core_sources", "libtorch_core_sources", "libtorch_distributed_sources", "libtorch_extra_sources", "jit_core_sources") load("//:tools/build_variables.bzl", "jit_core_sources", "libtorch_core_sources", "libtorch_cuda_sources", "libtorch_distributed_sources", "libtorch_extra_sources", "libtorch_nvfuser_generated_headers", "libtorch_nvfuser_runtime_sources", "libtorch_python_core_sources", "torch_cpp_srcs")
load("//tools/rules:cu.bzl", "cu_library") load("//tools/rules:cu.bzl", "cu_library")
load("//tools/config:defs.bzl", "if_cuda") load("//tools/config:defs.bzl", "if_cuda")
load("//:aten.bzl", "intern_build_aten_ops", "generate_aten") load("//:aten.bzl", "intern_build_aten_ops", "generate_aten")
@ -15,6 +15,7 @@ COMMON_COPTS = [
"-DHAVE_SHM_UNLINK=1", "-DHAVE_SHM_UNLINK=1",
"-D_FILE_OFFSET_BITS=64", "-D_FILE_OFFSET_BITS=64",
"-DHAVE_GCC_GET_CPUID", "-DHAVE_GCC_GET_CPUID",
"-DTH_BLAS_MKL",
"-DUSE_GCC_GET_CPUID", "-DUSE_GCC_GET_CPUID",
"-DTH_HAVE_THREAD", "-DTH_HAVE_THREAD",
"-DUSE_FBGEMM", "-DUSE_FBGEMM",
@ -37,11 +38,11 @@ py_binary(
], ],
) )
aten_generation_srcs = ["aten/src/ATen/native/native_functions.yaml"] + glob(["aten/src/ATen/templates/**"])
generate_aten( generate_aten(
name = "generated_cpp", name = "generated_cpp",
srcs = [ srcs = aten_generation_srcs,
"aten/src/ATen/native/native_functions.yaml",
] + glob(["aten/src/ATen/templates/**"]),
outs = [ outs = [
"aten/src/ATen/Declarations.yaml", "aten/src/ATen/Declarations.yaml",
"aten/src/ATen/RegisterBackendSelect.cpp", "aten/src/ATen/RegisterBackendSelect.cpp",
@ -62,8 +63,6 @@ generate_aten(
"aten/src/ATen/RegisterSchema.cpp", "aten/src/ATen/RegisterSchema.cpp",
"aten/src/ATen/CPUFunctions.h", "aten/src/ATen/CPUFunctions.h",
"aten/src/ATen/CPUFunctions_inl.h", "aten/src/ATen/CPUFunctions_inl.h",
"aten/src/ATen/CUDAFunctions.h",
"aten/src/ATen/CUDAFunctions_inl.h",
"aten/src/ATen/CompositeExplicitAutogradFunctions.h", "aten/src/ATen/CompositeExplicitAutogradFunctions.h",
"aten/src/ATen/CompositeExplicitAutogradFunctions_inl.h", "aten/src/ATen/CompositeExplicitAutogradFunctions_inl.h",
"aten/src/ATen/CompositeImplicitAutogradFunctions.h", "aten/src/ATen/CompositeImplicitAutogradFunctions.h",
@ -82,6 +81,8 @@ generate_aten(
"aten/src/ATen/MetaFunctions.h", "aten/src/ATen/MetaFunctions.h",
"aten/src/ATen/MetaFunctions_inl.h", "aten/src/ATen/MetaFunctions_inl.h",
"aten/src/ATen/MethodOperators.h", "aten/src/ATen/MethodOperators.h",
"aten/src/ATen/NativeMetaFunctions.h",
"aten/src/ATen/RegistrationDeclarations.h",
"aten/src/ATen/core/TensorBody.h", "aten/src/ATen/core/TensorBody.h",
"aten/src/ATen/core/TensorMethods.cpp", "aten/src/ATen/core/TensorMethods.cpp",
"aten/src/ATen/core/ATenOpList.cpp", "aten/src/ATen/core/ATenOpList.cpp",
@ -89,6 +90,23 @@ generate_aten(
generator=":gen", generator=":gen",
) )
# this hack is due to https://github.com/bazelbuild/bazel/issues/281
# since `outs` cannot be configured with if_cuda, we rerun the same command and declare cuda related files separately here.
genrule(
name = "generated_cuda_cpp",
srcs = aten_generation_srcs,
outs = [
"aten/src/ATen/CUDAFunctions.h",
"aten/src/ATen/CUDAFunctions_inl.h",
"aten/src/ATen/RegisterCUDA.cpp",
"aten/src/ATen/RegisterQuantizedCUDA.cpp",
"aten/src/ATen/RegisterSparseCUDA.cpp",
"aten/src/ATen/RegisterSparseCsrCUDA.cpp",
],
cmd = "$(location :gen) --source-path `dirname $(location aten/src/ATen/native/native_functions.yaml)`/.. --install_dir `dirname $(location aten/src/ATen/RegisterCUDA.cpp)`",
tools = [":gen"],
)
py_library( py_library(
name = "tools_codegen", name = "tools_codegen",
srcs = glob(["tools/codegen/**/*.py"]), srcs = glob(["tools/codegen/**/*.py"]),
@ -230,7 +248,7 @@ filegroup(
filegroup( filegroup(
name = "aten_native_mkl_cpp", name = "aten_native_mkl_cpp",
srcs = glob(["aten/src/ATen/native/mkl/*.cpp"]), srcs = glob(["aten/src/ATen/native/mkl/*.cpp", "aten/src/ATen/mkl/*.cpp"]),
) )
filegroup( filegroup(
@ -266,135 +284,40 @@ filegroup(
) )
filegroup( filegroup(
name = "aten_cuda_srcs", name = "aten_cuda_cpp_srcs",
srcs = [ srcs = glob(
"aten/src/ATen/cuda/CUDABlas.cpp", [
"aten/src/ATen/cuda/CUDASolver.cpp", "aten/src/ATen/cuda/*.cpp",
"aten/src/ATen/cuda/CUDAContext.cpp", "aten/src/ATen/cuda/detail/*.cpp",
"aten/src/ATen/cuda/CUDAGeneratorImpl.cpp", "aten/src/ATen/cudnn/*.cpp",
"aten/src/ATen/cuda/CUDAGraph.cpp", "aten/src/ATen/native/cuda/*.cpp",
"aten/src/ATen/cuda/CuSparseHandlePool.cpp", "aten/src/ATen/native/cudnn/*.cpp",
"aten/src/ATen/cuda/CublasHandlePool.cpp", "aten/src/ATen/native/miopen/*.cpp",
"aten/src/ATen/cuda/CusolverDnHandlePool.cpp", "aten/src/ATen/native/sparse/cuda/*.cpp",
"aten/src/ATen/cuda/PinnedMemoryAllocator.cpp", "aten/src/THC/*.cpp",
"aten/src/ATen/cuda/detail/CUDAHooks.cpp", ],
"aten/src/ATen/cudnn/AutocastRNN.cpp", ),
"aten/src/ATen/cudnn/Descriptors.cpp",
"aten/src/ATen/cudnn/Handle.cpp",
"aten/src/ATen/cudnn/Types.cpp",
"aten/src/ATen/native/cuda/CUDAUnaryOps.cpp",
"aten/src/ATen/native/cuda/TensorShapeCUDA.cpp",
"aten/src/ATen/native/cudnn/AffineGridGenerator.cpp",
"aten/src/ATen/native/cudnn/BatchNorm.cpp",
"aten/src/ATen/native/cudnn/Conv.cpp",
"aten/src/ATen/native/cudnn/GridSampler.cpp",
"aten/src/ATen/native/cudnn/LossCTC.cpp",
"aten/src/ATen/native/cudnn/RNN.cpp",
"aten/src/ATen/native/miopen/BatchNorm_miopen.cpp",
"aten/src/ATen/native/miopen/Conv_miopen.cpp",
"aten/src/ATen/native/miopen/RNN_miopen.cpp",
"aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cpp",
"aten/src/ATen/native/sparse/cuda/SparseBlas.cpp",
"aten/src/ATen/native/sparse/cuda/SparseBlasImpl.cpp",
],
) )
filegroup( filegroup(
name = "aten_srcs_cu", name = "aten_cu_srcs",
srcs = [ srcs = glob([
"aten/src/ATen/cuda/cub.cu.cc", "aten/src/ATen/cuda/*.cu",
"aten/src/ATen/cuda/detail/IndexUtils.cu.cc", "aten/src/ATen/cuda/detail/*.cu",
"aten/src/ATen/cuda/detail/CUDAGraphsUtils.cu.cc", "aten/src/ATen/native/cuda/*.cu",
"aten/src/ATen/native/cuda/Activation.cu.cc", "aten/src/ATen/native/quantized/cuda/*.cu",
"aten/src/ATen/native/cuda/AdaptiveAveragePooling.cu.cc", "aten/src/ATen/native/sparse/cuda/*.cu",
"aten/src/ATen/native/cuda/AdaptiveAveragePooling3d.cu.cc", ]),
"aten/src/ATen/native/cuda/AdaptiveMaxPooling2d.cu.cc",
"aten/src/ATen/native/cuda/AdaptiveMaxPooling3d.cu.cc",
"aten/src/ATen/native/cuda/AveragePool2d.cu.cc",
"aten/src/ATen/native/cuda/AveragePool3d.cu.cc",
"aten/src/ATen/native/cuda/BatchLinearAlgebra.cu.cc",
"aten/src/ATen/native/cuda/BatchLinearAlgebraLib.cu.cc",
"aten/src/ATen/native/cuda/BinaryArithmeticKernel.cu.cc",
"aten/src/ATen/native/cuda/BinaryCompareKernel.cu.cc",
"aten/src/ATen/native/cuda/BinaryMiscOpsKernels.cu.cc",
"aten/src/ATen/native/cuda/CUDAScalar.cu.cc",
"aten/src/ATen/native/cuda/Col2Im.cu.cc",
"aten/src/ATen/native/cuda/Copy.cu.cc",
"aten/src/ATen/native/cuda/CrossKernel.cu.cc",
"aten/src/ATen/native/cuda/DilatedMaxPool2d.cu.cc",
"aten/src/ATen/native/cuda/DilatedMaxPool3d.cu.cc",
"aten/src/ATen/native/cuda/DistanceKernel.cu.cc",
"aten/src/ATen/native/cuda/Distributions.cu.cc",
"aten/src/ATen/native/cuda/Dropout.cu.cc",
"aten/src/ATen/native/cuda/Embedding.cu.cc",
"aten/src/ATen/native/cuda/EmbeddingBackwardKernel.cu.cc",
"aten/src/ATen/native/cuda/EmbeddingBag.cu.cc",
"aten/src/ATen/native/cuda/FillKernel.cu.cc",
"aten/src/ATen/native/cuda/FractionalMaxPool2d.cu.cc",
"aten/src/ATen/native/cuda/FractionalMaxPool3d.cu.cc",
"aten/src/ATen/native/cuda/GridSampler.cu.cc",
"aten/src/ATen/native/cuda/Im2Col.cu.cc",
"aten/src/ATen/native/cuda/IndexKernel.cu.cc",
"aten/src/ATen/native/cuda/Indexing.cu.cc",
"aten/src/ATen/native/cuda/Lerp.cu.cc",
"aten/src/ATen/native/cuda/LinearAlgebra.cu.cc",
"aten/src/ATen/native/cuda/Loss.cu.cc",
"aten/src/ATen/native/cuda/LossCTC.cu.cc",
"aten/src/ATen/native/cuda/MaxUnpooling.cu.cc",
"aten/src/ATen/native/cuda/MultinomialKernel.cu.cc",
"aten/src/ATen/native/cuda/MultiLabelMarginCriterion.cu.cc",
"aten/src/ATen/native/cuda/NaiveConvolutionTranspose2d.cu.cc",
"aten/src/ATen/native/cuda/NaiveConvolutionTranspose3d.cu.cc",
"aten/src/ATen/native/cuda/NaiveDilatedConvolution.cu.cc",
"aten/src/ATen/native/cuda/NLLLoss2d.cu.cc",
"aten/src/ATen/native/cuda/Normalization.cu.cc",
"aten/src/ATen/native/cuda/PointwiseOpsKernel.cu.cc",
"aten/src/ATen/native/cuda/PowKernel.cu.cc",
"aten/src/ATen/native/cuda/RNN.cu.cc",
"aten/src/ATen/native/cuda/RangeFactories.cu.cc",
"aten/src/ATen/native/cuda/Reduce.cu.cc",
"aten/src/ATen/native/cuda/ReduceOpsKernel.cu.cc",
"aten/src/ATen/native/cuda/ReflectionPad.cu.cc",
"aten/src/ATen/native/cuda/Repeat.cu.cc",
"aten/src/ATen/native/cuda/ReplicationPadding.cu.cc",
"aten/src/ATen/native/cuda/Resize.cu.cc",
"aten/src/ATen/native/cuda/SegmentReduce.cu.cc",
"aten/src/ATen/native/cuda/SoftMax.cu.cc",
"aten/src/ATen/native/cuda/SortingKthValue.cu.cc",
"aten/src/ATen/native/cuda/SparseMM.cu.cc",
"aten/src/ATen/native/cuda/SpectralOps.cu.cc",
"aten/src/ATen/native/cuda/SummaryOps.cu.cc",
"aten/src/ATen/native/cuda/TensorCompare.cu.cc",
"aten/src/ATen/native/cuda/TensorFactories.cu.cc",
"aten/src/ATen/native/cuda/TensorTopK.cu.cc",
"aten/src/ATen/native/cuda/TensorTransformations.cu.cc",
"aten/src/ATen/native/cuda/TriangularOps.cu.cc",
"aten/src/ATen/native/cuda/UnaryOpsKernel.cu.cc",
"aten/src/ATen/native/cuda/UnarySpecialOpsKernel.cu.cc",
"aten/src/ATen/native/cuda/Unique.cu.cc",
"aten/src/ATen/native/cuda/UpSampleBicubic2d.cu.cc",
"aten/src/ATen/native/cuda/UpSampleBilinear2d.cu.cc",
"aten/src/ATen/native/cuda/UpSampleLinear1d.cu.cc",
"aten/src/ATen/native/cuda/UpSampleNearest1d.cu.cc",
"aten/src/ATen/native/cuda/UpSampleNearest2d.cu.cc",
"aten/src/ATen/native/cuda/UpSampleNearest3d.cu.cc",
"aten/src/ATen/native/cuda/UpSampleTrilinear3d.cu.cc",
"aten/src/ATen/native/cuda/WeightNorm.cu.cc",
"aten/src/ATen/native/cuda/layer_norm_kernel.cu.cc",
"aten/src/ATen/native/quantized/cuda/fake_quantize_core.cu.cc",
"aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cu.cc",
"aten/src/ATen/native/sparse/cuda/SparseCUDATensor.cu.cc",
"aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu.cc",
],
) )
header_template_rule( header_template_rule(
name = "aten_src_ATen_config", name = "aten_src_ATen_config",
src = "aten/src/ATen/Config.h.in", src = "aten/src/ATen/Config.h.in",
out = "aten/src/ATen/Config.h", out = "aten/src/ATen/Config.h",
include = "aten/src",
substitutions = { substitutions = {
"@AT_MKLDNN_ENABLED@": "1", "@AT_MKLDNN_ENABLED@": "1",
"@AT_MKL_ENABLED@": "0", "@AT_MKL_ENABLED@": "1",
"@AT_FFTW_ENABLED@": "0", "@AT_FFTW_ENABLED@": "0",
"@AT_POCKETFFT_ENABLED@": "0", "@AT_POCKETFFT_ENABLED@": "0",
"@AT_NNPACK_ENABLED@": "0", "@AT_NNPACK_ENABLED@": "0",
@ -413,6 +336,7 @@ header_template_rule(
name = "aten_src_ATen_cuda_config", name = "aten_src_ATen_cuda_config",
src = "aten/src/ATen/cuda/CUDAConfig.h.in", src = "aten/src/ATen/cuda/CUDAConfig.h.in",
out = "aten/src/ATen/cuda/CUDAConfig.h", out = "aten/src/ATen/cuda/CUDAConfig.h",
include = "aten/src",
substitutions = { substitutions = {
"@AT_CUDNN_ENABLED@": "1", "@AT_CUDNN_ENABLED@": "1",
"@AT_ROCM_ENABLED@": "0", "@AT_ROCM_ENABLED@": "0",
@ -429,18 +353,19 @@ cc_library(
] + glob([ ] + glob([
"aten/src/**/*.h", "aten/src/**/*.h",
"aten/src/**/*.hpp", "aten/src/**/*.hpp",
"aten/src/ATen/cuda/**/*.cuh",
"aten/src/ATen/native/**/*.cuh",
"aten/src/TH/**/*.cpp", "aten/src/TH/**/*.cpp",
"aten/src/THC/*.cuh", "aten/src/THC/*.cuh",
"aten/src/THC/generic/*.cu",
], ],
exclude = [ ) + [
"aten/src/ATen/Config.h",
],) + [
":generated_cpp",
":aten_src_ATen_config", ":aten_src_ATen_config",
":generated_cpp",
":generated_cuda_cpp",
], ],
includes = [ includes = [
"aten/src", "aten/src",
"aten/src/TH",
], ],
deps = [ deps = [
"//c10:headers", "//c10:headers",
@ -464,6 +389,7 @@ intern_build_aten_ops(
":aten_headers", ":aten_headers",
"@sleef", "@sleef",
"@fbgemm", "@fbgemm",
"@mkl",
], ],
) )
@ -530,12 +456,17 @@ cc_binary(
cc_library( cc_library(
name = "aten_cuda_cpp", name = "aten_cuda_cpp",
srcs = [":aten_cuda_srcs"], srcs = [
":aten_cuda_cpp_srcs",
":generated_cuda_cpp",
],
hdrs = [":aten_src_ATen_cuda_config"],
copts = ATEN_COPTS, copts = ATEN_COPTS,
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
":aten", ":aten",
"@cuda", "@cuda",
"@cuda//:cusolver",
"@cuda//:nvrtc", "@cuda//:nvrtc",
"@cudnn", "@cudnn",
], ],
@ -552,9 +483,7 @@ torch_cuda_half_options = [
cu_library( cu_library(
name = "aten_cuda", name = "aten_cuda",
srcs = [ srcs = [":aten_cu_srcs"],
":aten_srcs_cu",
],
copts = ATEN_COPTS + torch_cuda_half_options, copts = ATEN_COPTS + torch_cuda_half_options,
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
@ -618,6 +547,7 @@ header_template_rule(
filegroup( filegroup(
name = "caffe2_contrib_srcs", name = "caffe2_contrib_srcs",
srcs = [ srcs = [
"caffe2/contrib/aten/aten_op.cc",
"caffe2/contrib/gloo/allgather_ops.cc", "caffe2/contrib/gloo/allgather_ops.cc",
"caffe2/contrib/gloo/allreduce_ops.cc", "caffe2/contrib/gloo/allreduce_ops.cc",
"caffe2/contrib/gloo/barrier_ops.cc", "caffe2/contrib/gloo/barrier_ops.cc",
@ -787,6 +717,7 @@ filegroup(
"caffe2/operators/conv_op_eigen.cc", "caffe2/operators/conv_op_eigen.cc",
"caffe2/operators/conv_op_shared.cc", "caffe2/operators/conv_op_shared.cc",
"caffe2/operators/conv_transpose_gradient_op.cc", "caffe2/operators/conv_transpose_gradient_op.cc",
"caffe2/operators/conv_transpose_op.cc",
"caffe2/operators/conv_transpose_op_mobile.cc", "caffe2/operators/conv_transpose_op_mobile.cc",
"caffe2/operators/copy_op.cc", "caffe2/operators/copy_op.cc",
"caffe2/operators/copy_rows_to_tensor_op.cc", "caffe2/operators/copy_rows_to_tensor_op.cc",
@ -1182,7 +1113,7 @@ filegroup(
) )
filegroup( filegroup(
name = "caffe2_cuda_srcs", name = "caffe2_cuda_cpp_srcs",
srcs = [ srcs = [
"caffe2/contrib/aten/aten_op_gpu.cc", "caffe2/contrib/aten/aten_op_gpu.cc",
"caffe2/contrib/gloo/allreduce_ops_gpu.cc", "caffe2/contrib/gloo/allreduce_ops_gpu.cc",
@ -1251,155 +1182,155 @@ filegroup(
filegroup( filegroup(
name = "caffe2_cu_srcs", name = "caffe2_cu_srcs",
srcs = [ srcs = [
"caffe2/core/context_gpu.cu.cc", "caffe2/core/context_gpu.cu",
"caffe2/operators/abs_op.cu.cc", "caffe2/operators/abs_op.cu",
"caffe2/operators/accumulate_op.cu.cc", "caffe2/operators/accumulate_op.cu",
"caffe2/operators/accuracy_op.cu.cc", "caffe2/operators/accuracy_op.cu",
"caffe2/operators/acos_op.cu.cc", "caffe2/operators/acos_op.cu",
"caffe2/operators/affine_channel_op.cu.cc", "caffe2/operators/affine_channel_op.cu",
"caffe2/operators/alias_with_name.cu.cc", "caffe2/operators/alias_with_name.cu",
"caffe2/operators/arg_ops.cu.cc", "caffe2/operators/arg_ops.cu",
"caffe2/operators/asin_op.cu.cc", "caffe2/operators/asin_op.cu",
"caffe2/operators/assert_op.cu.cc", "caffe2/operators/assert_op.cu",
"caffe2/operators/atan_op.cu.cc", "caffe2/operators/atan_op.cu",
"caffe2/operators/batch_gather_ops.cu.cc", "caffe2/operators/batch_gather_ops.cu",
"caffe2/operators/batch_matmul_op.cu.cc", "caffe2/operators/batch_matmul_op.cu",
"caffe2/operators/batch_moments_op.cu.cc", "caffe2/operators/batch_moments_op.cu",
"caffe2/operators/batch_permutation_op.cu.cc", "caffe2/operators/batch_permutation_op.cu",
"caffe2/operators/batch_sparse_to_dense_op.cu.cc", "caffe2/operators/batch_sparse_to_dense_op.cu",
"caffe2/operators/boolean_mask_ops.cu.cc", "caffe2/operators/boolean_mask_ops.cu",
"caffe2/operators/boolean_unmask_ops.cu.cc", "caffe2/operators/boolean_unmask_ops.cu",
"caffe2/operators/bucketize_op.cu.cc", "caffe2/operators/bucketize_op.cu",
"caffe2/operators/cast_op.cu.cc", "caffe2/operators/cast_op.cu",
"caffe2/operators/cbrt_op.cu.cc", "caffe2/operators/cbrt_op.cu",
"caffe2/operators/ceil_op.cu.cc", "caffe2/operators/ceil_op.cu",
"caffe2/operators/channel_backprop_stats_op.cu.cc", "caffe2/operators/channel_backprop_stats_op.cu",
"caffe2/operators/channel_shuffle_op.cu.cc", "caffe2/operators/channel_shuffle_op.cu",
"caffe2/operators/channel_stats_op.cu.cc", "caffe2/operators/channel_stats_op.cu",
"caffe2/operators/channelwise_conv3d_op_cudnn.cu.cc", "caffe2/operators/channelwise_conv3d_op_cudnn.cu",
"caffe2/operators/clip_op.cu.cc", "caffe2/operators/clip_op.cu",
"caffe2/operators/copy_op.cu.cc", "caffe2/operators/copy_op.cu",
"caffe2/operators/cos_op.cu.cc", "caffe2/operators/cos_op.cu",
"caffe2/operators/cosh_op.cu.cc", "caffe2/operators/cosh_op.cu",
"caffe2/operators/cosine_embedding_criterion_op.cu.cc", "caffe2/operators/cosine_embedding_criterion_op.cu",
"caffe2/operators/cross_entropy_op.cu.cc", "caffe2/operators/cross_entropy_op.cu",
"caffe2/operators/cube_op.cu.cc", "caffe2/operators/cube_op.cu",
"caffe2/operators/data_couple_gpu.cu.cc", "caffe2/operators/data_couple_gpu.cu",
"caffe2/operators/deform_conv_op.cu.cc", "caffe2/operators/deform_conv_op.cu",
"caffe2/operators/depthwise_3x3_conv_op_cudnn.cu.cc", "caffe2/operators/depthwise_3x3_conv_op_cudnn.cu",
"caffe2/operators/distance_op.cu.cc", "caffe2/operators/distance_op.cu",
"caffe2/operators/dropout_op.cu.cc", "caffe2/operators/dropout_op.cu",
"caffe2/operators/elementwise_div_op.cu.cc", "caffe2/operators/elementwise_div_op.cu",
"caffe2/operators/elementwise_linear_op.cu.cc", "caffe2/operators/elementwise_linear_op.cu",
"caffe2/operators/elementwise_mul_op.cu.cc", "caffe2/operators/elementwise_mul_op.cu",
"caffe2/operators/elementwise_ops.cu.cc", "caffe2/operators/elementwise_ops.cu",
"caffe2/operators/elu_op.cu.cc", "caffe2/operators/elu_op.cu",
"caffe2/operators/enforce_finite_op.cu.cc", "caffe2/operators/enforce_finite_op.cu",
"caffe2/operators/ensure_cpu_output_op.cu.cc", "caffe2/operators/ensure_cpu_output_op.cu",
"caffe2/operators/erf_op.cu.cc", "caffe2/operators/erf_op.cu",
"caffe2/operators/filler_op.cu.cc", "caffe2/operators/filler_op.cu",
"caffe2/operators/find_op.cu.cc", "caffe2/operators/find_op.cu",
"caffe2/operators/floor_op.cu.cc", "caffe2/operators/floor_op.cu",
"caffe2/operators/gather_op.cu.cc", "caffe2/operators/gather_op.cu",
"caffe2/operators/gelu_op.cu.cc", "caffe2/operators/gelu_op.cu",
"caffe2/operators/generate_proposals_op.cu.cc", "caffe2/operators/generate_proposals_op.cu",
"caffe2/operators/generate_proposals_op_util_nms_gpu.cu.cc", "caffe2/operators/generate_proposals_op_util_nms_gpu.cu",
"caffe2/operators/given_tensor_byte_string_to_uint8_fill_op.cu.cc", "caffe2/operators/given_tensor_byte_string_to_uint8_fill_op.cu",
"caffe2/operators/given_tensor_fill_op.cu.cc", "caffe2/operators/given_tensor_fill_op.cu",
"caffe2/operators/glu_op.cu.cc", "caffe2/operators/glu_op.cu",
"caffe2/operators/group_norm_op.cu.cc", "caffe2/operators/group_norm_op.cu",
"caffe2/operators/gru_unit_op_gpu.cu.cc", "caffe2/operators/gru_unit_op_gpu.cu",
"caffe2/operators/half_float_ops.cu.cc", "caffe2/operators/half_float_ops.cu",
"caffe2/operators/hard_sigmoid_op.cu.cc", "caffe2/operators/hard_sigmoid_op.cu",
"caffe2/operators/instance_norm_op.cu.cc", "caffe2/operators/instance_norm_op.cu",
"caffe2/operators/integral_image_op.cu.cc", "caffe2/operators/integral_image_op.cu",
"caffe2/operators/layer_norm_op.cu.cc", "caffe2/operators/layer_norm_op.cu",
"caffe2/operators/leaky_relu_op.cu.cc", "caffe2/operators/leaky_relu_op.cu",
"caffe2/operators/lengths_pad_op.cu.cc", "caffe2/operators/lengths_pad_op.cu",
"caffe2/operators/lengths_tile_op.cu.cc", "caffe2/operators/lengths_tile_op.cu",
"caffe2/operators/local_response_normalization_op.cu.cc", "caffe2/operators/local_response_normalization_op.cu",
"caffe2/operators/logit_op.cu.cc", "caffe2/operators/logit_op.cu",
"caffe2/operators/loss_op.cu.cc", "caffe2/operators/loss_op.cu",
"caffe2/operators/lp_pool_op.cu.cc", "caffe2/operators/lp_pool_op.cu",
"caffe2/operators/lstm_unit_op_gpu.cu.cc", "caffe2/operators/lstm_unit_op_gpu.cu",
"caffe2/operators/margin_ranking_criterion_op.cu.cc", "caffe2/operators/margin_ranking_criterion_op.cu",
"caffe2/operators/max_pool_with_index.cu.cc", "caffe2/operators/max_pool_with_index.cu",
"caffe2/operators/mean_op.cu.cc", "caffe2/operators/mean_op.cu",
"caffe2/operators/mem_query_op.cu.cc", "caffe2/operators/mem_query_op.cu",
"caffe2/operators/minmax_ops.cu.cc", "caffe2/operators/minmax_ops.cu",
"caffe2/operators/moments_op.cu.cc", "caffe2/operators/moments_op.cu",
"caffe2/operators/multi_class_accuracy_op.cu.cc", "caffe2/operators/multi_class_accuracy_op.cu",
"caffe2/operators/normalize_ops.cu.cc", "caffe2/operators/normalize_ops.cu",
"caffe2/operators/one_hot_ops.cu.cc", "caffe2/operators/one_hot_ops.cu",
"caffe2/operators/pack_segments.cu.cc", "caffe2/operators/pack_segments.cu",
"caffe2/operators/pad_op_gpu.cu.cc", "caffe2/operators/pad_op_gpu.cu",
"caffe2/operators/perplexity_op.cu.cc", "caffe2/operators/perplexity_op.cu",
"caffe2/operators/piecewise_linear_transform_op.cu.cc", "caffe2/operators/piecewise_linear_transform_op.cu",
"caffe2/operators/pool_op.cu.cc", "caffe2/operators/pool_op.cu",
"caffe2/operators/pow_op.cu.cc", "caffe2/operators/pow_op.cu",
"caffe2/operators/prelu_op.cu.cc", "caffe2/operators/prelu_op.cu",
"caffe2/operators/reciprocal_op.cu.cc", "caffe2/operators/reciprocal_op.cu",
"caffe2/operators/reduce_front_back_max_ops.cu.cc", "caffe2/operators/reduce_front_back_max_ops.cu",
"caffe2/operators/reduce_front_back_sum_mean_ops.cu.cc", "caffe2/operators/reduce_front_back_sum_mean_ops.cu",
"caffe2/operators/reduce_ops.cu.cc", "caffe2/operators/reduce_ops.cu",
"caffe2/operators/reduction_ops.cu.cc", "caffe2/operators/reduction_ops.cu",
"caffe2/operators/relu_n_op.cu.cc", "caffe2/operators/relu_n_op.cu",
"caffe2/operators/relu_op.cu.cc", "caffe2/operators/relu_op.cu",
"caffe2/operators/replace_nan_op.cu.cc", "caffe2/operators/replace_nan_op.cu",
"caffe2/operators/resize_3d_op.cu.cc", "caffe2/operators/resize_3d_op.cu",
"caffe2/operators/resize_op.cu.cc", "caffe2/operators/resize_op.cu",
"caffe2/operators/reverse_packed_segs_op.cu.cc", "caffe2/operators/reverse_packed_segs_op.cu",
"caffe2/operators/rmac_regions_op.cu.cc", "caffe2/operators/rmac_regions_op.cu",
"caffe2/operators/rnn/recurrent_network_op_gpu.cu.cc", "caffe2/operators/rnn/recurrent_network_op_gpu.cu",
"caffe2/operators/roi_align_gradient_op.cu.cc", "caffe2/operators/roi_align_gradient_op.cu",
"caffe2/operators/roi_align_op.cu.cc", "caffe2/operators/roi_align_op.cu",
"caffe2/operators/roi_align_rotated_gradient_op.cu.cc", "caffe2/operators/roi_align_rotated_gradient_op.cu",
"caffe2/operators/roi_align_rotated_op.cu.cc", "caffe2/operators/roi_align_rotated_op.cu",
"caffe2/operators/roi_pool_op.cu.cc", "caffe2/operators/roi_pool_op.cu",
"caffe2/operators/rsqrt_op.cu.cc", "caffe2/operators/rsqrt_op.cu",
"caffe2/operators/scale_blobs_op.cu.cc", "caffe2/operators/scale_blobs_op.cu",
"caffe2/operators/segment_reduction_op_gpu.cu.cc", "caffe2/operators/segment_reduction_op_gpu.cu",
"caffe2/operators/selu_op.cu.cc", "caffe2/operators/selu_op.cu",
"caffe2/operators/sequence_ops.cu.cc", "caffe2/operators/sequence_ops.cu",
"caffe2/operators/sigmoid_op.cu.cc", "caffe2/operators/sigmoid_op.cu",
"caffe2/operators/sin_op.cu.cc", "caffe2/operators/sin_op.cu",
"caffe2/operators/sinh_op.cu.cc", "caffe2/operators/sinh_op.cu",
"caffe2/operators/slice_op.cu.cc", "caffe2/operators/slice_op.cu",
"caffe2/operators/softmax_ops.cu.cc", "caffe2/operators/softmax_ops.cu",
"caffe2/operators/softplus_op.cu.cc", "caffe2/operators/softplus_op.cu",
"caffe2/operators/softsign_op.cu.cc", "caffe2/operators/softsign_op.cu",
"caffe2/operators/space_batch_op_gpu.cu.cc", "caffe2/operators/space_batch_op_gpu.cu",
"caffe2/operators/sparse_normalize_op_gpu.cu.cc", "caffe2/operators/sparse_normalize_op_gpu.cu",
"caffe2/operators/sparse_to_dense_op.cu.cc", "caffe2/operators/sparse_to_dense_op.cu",
"caffe2/operators/spatial_batch_norm_op.cu.cc", "caffe2/operators/spatial_batch_norm_op.cu",
"caffe2/operators/spatial_batch_norm_op_cudnn.cu.cc", "caffe2/operators/spatial_batch_norm_op_cudnn.cu",
"caffe2/operators/stump_func_op.cu.cc", "caffe2/operators/stump_func_op.cu",
"caffe2/operators/summarize_op.cu.cc", "caffe2/operators/summarize_op.cu",
"caffe2/operators/swish_op.cu.cc", "caffe2/operators/swish_op.cu",
"caffe2/operators/tan_op.cu.cc", "caffe2/operators/tan_op.cu",
"caffe2/operators/tanh_op.cu.cc", "caffe2/operators/tanh_op.cu",
"caffe2/operators/thresholded_relu_op.cu.cc", "caffe2/operators/thresholded_relu_op.cu",
"caffe2/operators/tile_op.cu.cc", "caffe2/operators/tile_op.cu",
"caffe2/operators/top_k.cu.cc", "caffe2/operators/top_k.cu",
"caffe2/operators/transpose_op.cu.cc", "caffe2/operators/transpose_op.cu",
"caffe2/operators/unique_ops.cu.cc", "caffe2/operators/unique_ops.cu",
"caffe2/operators/upsample_op.cu.cc", "caffe2/operators/upsample_op.cu",
"caffe2/operators/utility_ops.cu.cc", "caffe2/operators/utility_ops.cu",
"caffe2/operators/weighted_sample_op.cu.cc", "caffe2/operators/weighted_sample_op.cu",
"caffe2/sgd/adadelta_op_gpu.cu.cc", "caffe2/sgd/adadelta_op_gpu.cu",
"caffe2/sgd/adagrad_op_gpu.cu.cc", "caffe2/sgd/adagrad_op_gpu.cu",
"caffe2/sgd/adam_op_gpu.cu.cc", "caffe2/sgd/adam_op_gpu.cu",
"caffe2/sgd/fp16_momentum_sgd_op.cu.cc", "caffe2/sgd/fp16_momentum_sgd_op.cu",
"caffe2/sgd/fp32_momentum_sgd_op.cu.cc", "caffe2/sgd/fp32_momentum_sgd_op.cu",
"caffe2/sgd/lars_op_gpu.cu.cc", "caffe2/sgd/lars_op_gpu.cu",
"caffe2/sgd/momentum_sgd_op_gpu.cu.cc", "caffe2/sgd/momentum_sgd_op_gpu.cu",
"caffe2/sgd/rmsprop_op_gpu.cu.cc", "caffe2/sgd/rmsprop_op_gpu.cu",
"caffe2/sgd/yellowfin_op_gpu.cu.cc", "caffe2/sgd/yellowfin_op_gpu.cu",
"caffe2/utils/math/broadcast.cu.cc", "caffe2/utils/math/broadcast.cu",
"caffe2/utils/math/elementwise.cu.cc", "caffe2/utils/math/elementwise.cu",
"caffe2/utils/math/reduce.cu.cc", "caffe2/utils/math/reduce.cu",
"caffe2/utils/math/transpose.cu.cc", "caffe2/utils/math/transpose.cu",
"caffe2/utils/math_gpu.cu.cc", "caffe2/utils/math_gpu.cu",
], ],
) )
@ -1432,6 +1363,29 @@ cc_library(
], ],
) )
py_binary(
name = "gen_op",
srcs = ["caffe2/contrib/aten/gen_op.py"],
deps = [":tools_codegen"],
)
genrule(
name = "generated_caffe2_aten_op_headers",
srcs = [
"caffe2/contrib/aten/aten_op_template.h",
"aten/src/ATen/Declarations.yaml",
],
outs = ["caffe2/caffe2/contrib/aten/gen_aten_op.h"],
cmd = """
$(location :gen_op) \
--output_prefix gen_ \
--install_dir $(@D) \
--aten_root `dirname $(location aten/src/ATen/Declarations.yaml)`/../.. \
--template_dir `dirname $(location caffe2/contrib/aten/aten_op_template.h)` \
--yaml_dir `dirname $(location aten/src/ATen/Declarations.yaml)`""",
tools = [":gen_op"],
)
cc_library( cc_library(
name = "caffe2_headers", name = "caffe2_headers",
hdrs = glob([ hdrs = glob([
@ -1472,7 +1426,7 @@ cc_library(
]) + if_cuda(glob([ ]) + if_cuda(glob([
"caffe2/**/*.cuh", "caffe2/**/*.cuh",
"caffe2/image/*.h", "caffe2/image/*.h",
])), ])) + [":generated_caffe2_aten_op_headers"],
copts = CAFFE2_COPTS, copts = CAFFE2_COPTS,
includes = [ includes = [
"caffe2/contrib/aten", "caffe2/contrib/aten",
@ -1554,7 +1508,7 @@ cc_library(
"@fmt", "@fmt",
] + if_cuda( ] + if_cuda(
[ [
":caffe2_cpp_cuda", ":caffe2_cuda_cpp",
":aten_cuda", ":aten_cuda",
"@tensorpipe//:tensorpipe_cuda", "@tensorpipe//:tensorpipe_cuda",
], ],
@ -1567,8 +1521,8 @@ cc_library(
) )
cc_library( cc_library(
name = "caffe2_cpp_cuda", name = "caffe2_cuda_cpp",
srcs = [":caffe2_cuda_srcs"], srcs = [":caffe2_cuda_cpp_srcs"],
copts = CAFFE2_COPTS, copts = CAFFE2_COPTS,
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
@ -1586,7 +1540,6 @@ cu_library(
deps = [ deps = [
":aten", ":aten",
":caffe2_headers", ":caffe2_headers",
"@cub",
"@cuda//:cublas", "@cuda//:cublas",
"@cuda//:curand", "@cuda//:curand",
"@cudnn", "@cudnn",
@ -1610,6 +1563,7 @@ PERF_COPTS = [
"-DHAVE_SHM_OPEN=1", "-DHAVE_SHM_OPEN=1",
"-DHAVE_SHM_UNLINK=1", "-DHAVE_SHM_UNLINK=1",
"-DSLEEF_STATIC_LIBS=1", "-DSLEEF_STATIC_LIBS=1",
"-DTH_BALS_MKL",
"-D_FILE_OFFSET_BITS=64", "-D_FILE_OFFSET_BITS=64",
"-DUSE_FBGEMM", "-DUSE_FBGEMM",
"-fvisibility-inlines-hidden", "-fvisibility-inlines-hidden",
@ -1693,10 +1647,29 @@ genrule(
srcs = ["torch/csrc/api/include/torch/version.h.in", "version.txt"], srcs = ["torch/csrc/api/include/torch/version.h.in", "version.txt"],
outs = ["torch/csrc/api/include/torch/version.h"], outs = ["torch/csrc/api/include/torch/version.h"],
cmd = "$(location :gen_version_header) --template-path $(location torch/csrc/api/include/torch/version.h.in) --version-path $(location version.txt) --output-path $@", cmd = "$(location :gen_version_header) --template-path $(location torch/csrc/api/include/torch/version.h.in) --version-path $(location version.txt) --output-path $@",
tools = [':gen_version_header'] tools = [':gen_version_header'],
) )
torch_cuda_headers = glob(["torch/csrc/cuda/*.h"]) py_binary(
name = "stringify_file",
srcs = ["torch/csrc/jit/codegen/cuda/tools/stringify_file.py"],
)
generated_nvfuser_hdrs = ["generated_" + hdr for hdr in libtorch_nvfuser_generated_headers]
[
genrule(
name = name,
srcs = [src],
outs = ["nvfuser_resources/{}".format(hdr)],
cmd = "$(location :stringify_file) -i $< -o $@",
tools = [":stringify_file"],
)
for name, src, hdr in zip(generated_nvfuser_hdrs, libtorch_nvfuser_runtime_sources, libtorch_nvfuser_generated_headers)
]
torch_cuda_headers = glob(["torch/csrc/cuda/*.h"]) + generated_nvfuser_hdrs
cc_library( cc_library(
name = "torch_headers", name = "torch_headers",
hdrs = if_cuda( hdrs = if_cuda(
@ -1707,6 +1680,7 @@ cc_library(
"torch/csrc/**/*.h", "torch/csrc/**/*.h",
"torch/csrc/distributed/c10d/*.hpp", "torch/csrc/distributed/c10d/*.hpp",
"torch/lib/libshm/*.h", "torch/lib/libshm/*.h",
"torch/csrc/generic/*.cpp",
], ],
exclude = [ exclude = [
"torch/csrc/autograd/generated/VariableType.h", "torch/csrc/autograd/generated/VariableType.h",
@ -1743,21 +1717,25 @@ TORCH_COPTS = COMMON_COPTS + [
"-fno-trapping-math", "-fno-trapping-math",
] ]
cu_library(
name = "torch_distributed_cuda",
srcs = ["torch/csrc/distributed/c10d/quantization/quantization_gpu.cu"],
deps = [":torch_headers"],
)
cc_library( cc_library(
name = "torch", name = "torch",
srcs = if_cuda(glob( srcs = if_cuda(glob(
[ libtorch_cuda_sources,
"torch/csrc/cuda/*.cpp",
"torch/csrc/autograd/functions/comm.cpp",
],
exclude = [ exclude = [
"torch/csrc/cuda/python_nccl.cpp", "torch/csrc/cuda/python_nccl.cpp",
"torch/csrc/cuda/nccl.cpp", "torch/csrc/cuda/nccl.cpp",
"torch/csrc/distributed/c10d/quantization/quantization_gpu.cu",
], ],
)) + libtorch_core_sources + libtorch_distributed_sources + torch_cpp_srcs + libtorch_extra_sources + jit_core_sources + [ )) + libtorch_core_sources + libtorch_distributed_sources + torch_cpp_srcs + libtorch_extra_sources + jit_core_sources + [
":cpp_generated_code", ":cpp_generated_code",
], ],
copts = TORCH_COPTS + if_cuda(["-DUSE_CUDA=1"]), copts = TORCH_COPTS,
defines = [ defines = [
"CAFFE2_NIGHTLY_VERSION=20200115", "CAFFE2_NIGHTLY_VERSION=20200115",
], ],
@ -1765,7 +1743,10 @@ cc_library(
deps = [ deps = [
":caffe2", ":caffe2",
":torch_headers", ":torch_headers",
], ] + if_cuda([
":torch_distributed_cuda",
"@cuda//:nvToolsExt",
]),
alwayslink = True, alwayslink = True,
) )
@ -1783,10 +1764,9 @@ cc_library(
"**/*.h", "**/*.h",
"**/*.cuh", "**/*.cuh",
]) + [ ]) + [
":generated_code", ":cpp_generated_code",
], ],
includes = [ includes = [
".",
"torch/csrc/api/include", "torch/csrc/api/include",
"torch/csrc/distributed", "torch/csrc/distributed",
"torch/lib", "torch/lib",
@ -1794,21 +1774,17 @@ cc_library(
], ],
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
":aten_headers", ":torch_headers",
":caffe2_headers",
"//c10:headers",
], ],
) )
cc_library( cc_library(
name = "torch_python", name = "torch_python",
srcs = libtorch_python_core_sources + [":python_generated_code"], srcs = libtorch_python_core_sources + [":python_generated_code"],
hdrs = glob([
"torch/csrc/generic/*.cpp",
]),
deps = [ deps = [
":torch", ":torch",
":shm", ":shm",
"@pybind11",
], ],
) )
@ -1842,11 +1818,16 @@ cc_library(
# Torch integration tests rely on a labeled data set from the MNIST database. # Torch integration tests rely on a labeled data set from the MNIST database.
# http://yann.lecun.com/exdb/mnist/ # http://yann.lecun.com/exdb/mnist/
cpp_api_tests = glob(["test/cpp/api/*.cpp"]) # imethod.cpp is excluded since torch/csrc/deploy* build is not yet supported.
cpp_api_tests = glob(
["test/cpp/api/*.cpp"],
exclude = ["test/cpp/api/imethod.cpp"],
)
[ [
cc_test( cc_test(
name = paths.split_extension(paths.basename(filename))[0].replace("-","_") + "_test", name = paths.split_extension(paths.basename(filename))[0].replace("-","_") + "_test",
size = "medium", size = "medium",
srcs = [filename], srcs = [filename],
deps = [ deps = [
":test_support", ":test_support",

View File

@ -1,7 +1,22 @@
workspace(name = "pytorch") workspace(name = "pytorch")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("//tools/rules:workspace.bzl", "new_patched_local_repository", "new_empty_repository") load("//tools/rules:workspace.bzl", "new_patched_local_repository")
http_archive(
name = "rules_cuda",
sha256 = "f80438bee9906e9ecb1a8a4ae2365374ac1e8a283897281a2db2fb7fcf746333",
strip_prefix = "runtime-b1c7cce21ba4661c17ac72421c6a0e2015e7bef3/third_party/rules_cuda",
urls = ["https://github.com/tensorflow/runtime/archive/b1c7cce21ba4661c17ac72421c6a0e2015e7bef3.tar.gz"],
)
load("@rules_cuda//cuda:dependencies.bzl", "rules_cuda_dependencies")
rules_cuda_dependencies()
load("@rules_cc//cc:repositories.bzl", "rules_cc_toolchains")
rules_cc_toolchains()
http_archive( http_archive(
name = "bazel_skylib", name = "bazel_skylib",
@ -171,13 +186,14 @@ load("@rules_python//python:repositories.bzl", "py_repositories")
py_repositories() py_repositories()
local_repository( new_local_repository(
name = "local_config_cuda", name = "cuda",
path = "third_party/tensorflow_cuda_bazel_build", build_file = "@//third_party:cuda.BUILD",
path = "/usr/local/cuda",
) )
# Wrapper to expose local_config_cuda in an agnostic way new_local_repository(
new_empty_repository( name = "cudnn",
name = "cuda", build_file = "@//third_party:cudnn.BUILD",
build_file = "//third_party:cuda.BUILD", path = "/usr/",
) )

101
third_party/cuda.BUILD vendored
View File

@ -1,43 +1,76 @@
""" # Adopted from: https://github.com/tensorflow/runtime/blob/master/third_party/rules_cuda/private/BUILD.local_cuda
Collect all the CUDA stuff from @local_config_cuda in a single target # Library targets are created corresponding to BUILD.bazel's needs.
for convenience.
""" cc_library(
name = "cuda_headers",
hdrs = glob([
"include/**",
"targets/x86_64-linux/include/**",
]),
includes = [
"include",
"targets/x86_64-linux/include",
],
visibility = ["//visibility:public"],
)
cc_library(
name = "cuda_driver",
srcs = ["lib64/stubs/libcuda.so"],
visibility = ["//visibility:public"],
)
cc_library( cc_library(
name = "cuda", name = "cuda",
srcs = ["targets/x86_64-linux/lib/libcudart.so"],
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [":cuda_headers"],
"@local_config_cuda//cuda:cublas",
"@local_config_cuda//cuda:cuda_driver",
"@local_config_cuda//cuda:cuda_headers",
"@local_config_cuda//cuda:cudart",
"@local_config_cuda//cuda:cufft",
"@local_config_cuda//cuda:curand",
],
) )
cc_library( cc_library(
name = "cupti", name = "cufft",
deps = [ srcs = ["targets/x86_64-linux/lib/libcufft.so"],
"@local_config_cuda//cuda:cupti_headers", visibility = ["//visibility:public"],
"@local_config_cuda//cuda:cupti_link",
],
) )
[ cc_library(
alias( name = "cublas",
name = lib, srcs = [
actual = "@local_config_cuda//cuda:{}".format(lib), "targets/x86_64-linux/lib/libcublasLt.so",
visibility = ["//visibility:public"], "targets/x86_64-linux/lib/libcublas.so",
) ],
for lib in [ visibility = ["//visibility:public"],
"cublas", )
"cufft",
"cusolver", cc_library(
"cusparse", name = "curand",
"curand", srcs = ["targets/x86_64-linux/lib/libcurand.so"],
"nvrtc", visibility = ["//visibility:public"],
"cuda_driver", )
"nvToolsExt",
] cc_library(
] name = "cusolver",
srcs = ["targets/x86_64-linux/lib/libcusolver.so"],
visibility = ["//visibility:public"],
)
cc_library(
name = "cusparse",
srcs = ["targets/x86_64-linux/lib/libcusparse.so"],
visibility = ["//visibility:public"],
)
cc_library(
name = "nvrtc",
srcs = [
"targets/x86_64-linux/lib/libnvrtc.so",
"targets/x86_64-linux/lib/libnvrtc-builtins.so",
],
visibility = ["//visibility:public"],
)
cc_library(
name = "nvToolsExt",
srcs = [ "lib64/libnvToolsExt.so"],
visibility = ["//visibility:public"],
)

26
third_party/cudnn.BUILD vendored Normal file
View File

@ -0,0 +1,26 @@
# Adopted from: https://github.com/NVIDIA/TRTorch/blob/master/third_party/cudnn/local/BUILD
cc_library(
name = "cudnn_headers",
hdrs = ["include/cudnn.h"] + glob([
"include/cudnn+.h",
"include/cudnn_*.h",
]),
includes = ["include/"],
visibility = ["//visibility:private"],
)
cc_import(
name = "cudnn_lib",
shared_library = "lib/x86_64-linux-gnu/libcudnn.so",
visibility = ["//visibility:private"],
)
cc_library(
name = "cudnn",
visibility = ["//visibility:public"],
deps = [
"cudnn_headers",
"cudnn_lib",
],
)

View File

@ -48,8 +48,8 @@ cc_library(
cu_library( cu_library(
name = "gloo_cuda", name = "gloo_cuda",
srcs = [ srcs = [
"gloo/cuda.cu.cc", "gloo/cuda.cu",
"gloo/cuda_private.cu.cc", "gloo/cuda_private.cu",
], ],
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
@ -72,8 +72,8 @@ cc_library(
"gloo/cuda*.cc", "gloo/cuda*.cc",
"gloo/common/win.cc", "gloo/common/win.cc",
"gloo/rendezvous/redis_store.cc", "gloo/rendezvous/redis_store.cc",
], ]
), ) + if_cuda(glob(["gloo/cuda*.cc"])),
copts = [ copts = [
"-std=gnu++11", "-std=gnu++11",
"-std=c++11", "-std=c++11",

View File

@ -58,7 +58,7 @@ def header_template_rule_impl(ctx):
CcInfo(compilation_context = cc_common.create_compilation_context( CcInfo(compilation_context = cc_common.create_compilation_context(
# pass out the include path for finding this header # pass out the include path for finding this header
includes = depset([ctx.outputs.out.dirname, ctx.bin_dir.path]), system_includes = depset([ctx.attr.include, ctx.outputs.out.dirname, ctx.bin_dir.path]),
# and the actual header here. # and the actual header here.
headers = depset([ctx.outputs.out]), headers = depset([ctx.outputs.out]),
@ -68,6 +68,7 @@ def header_template_rule_impl(ctx):
header_template_rule = rule( header_template_rule = rule(
attrs = { attrs = {
"out": attr.output(mandatory = True), "out": attr.output(mandatory = True),
"include": attr.string(),
"src": attr.label( "src": attr.label(
mandatory = True, mandatory = True,
allow_single_file = True, allow_single_file = True,

View File

@ -1,5 +0,0 @@
# Config for CUDA
This is a checked-in copy of the auto-generated config for building CUDA code with bazel. The content of this folder was generated from https://github.com/tensorflow/tensorflow `./configure` execution and then edited manually to fit the pytorch needs.
The LICENSE for the TensorFlow project is APACHE 2. The full LICENSE file could be found here https://github.com/tensorflow/tensorflow/blob/master/LICENSE.

View File

@ -1 +0,0 @@
workspace(name = "local_config_cuda")

View File

@ -1,451 +0,0 @@
licenses([
"restricted",
"reciprocal",
"notice",
]) # MPL2, portions GPL v3, LGPL v3, BSD-like
package(default_visibility = ["//visibility:public"])
config_setting(
name = "using_nvcc",
values = {
"define": "using_cuda_nvcc=true",
},
)
config_setting(
name = "using_clang",
values = {
"define": "using_cuda_clang=true",
},
)
# Equivalent to using_clang && -c opt.
config_setting(
name = "using_clang_opt",
values = {
"define": "using_cuda_clang=true",
"compilation_mode": "opt",
},
)
config_setting(
name = "darwin",
values = {"cpu": "darwin"},
)
cc_library(
name = "cuda_headers",
hdrs = [
":cuda-include",
":cudnn-include",
],
includes = [
".",
"include",
],
)
cc_library(
name = "cudnn_headers",
hdrs = [
":cudnn-include",
],
includes = [
".",
"include",
],
)
cc_library(
name = "cudart_static",
linkopts = [
"-L/usr/local/cuda/lib64",
],
)
cc_library(
name = "cuda_driver",
linkopts = ["-lcuda"],
deps = [":linker_search_path"],
)
# Provides the RPATH for Nvidia-less sytems to be able to run binaries linked to libcuda.
cc_library(
name = "driver_stub_runtime",
linkopts = [
"-Wl,-rpath,/usr/local/cuda/lib64/stubs",
],
deps = [":cuda_driver"],
)
cc_library(
name = "linker_search_path",
linkopts = [
"-L/usr/local/cuda/lib64",
"-L/usr/local/cuda/lib64/stubs",
"-Wl,-rpath-link,/usr/local/cuda/lib64",
"-Wl,-rpath-link,/usr/local/cuda/lib64/stubs",
],
)
[
cc_library(
name = libname,
linkopts = ["-l" + libname] + (["-lgomp"] if (libname == "cusolver") else []),
linkstatic = True,
deps = [":linker_search_path"],
)
for libname in [
"cublas",
"cudart",
"cudnn",
"cufft",
"curand",
"cusolver",
"cusparse",
"nvrtc",
"nvToolsExt",
]
]
cc_library(
name = "cuda",
deps = [
":cublas",
":cuda_headers",
":cudart",
":cudnn",
":cufft",
":curand",
":nvToolsExt",
],
)
# NVIDIA Performance Primitives (http://docs.nvidia.com/cuda/npp/modules.html))
# used by OpenCV
cc_library(
name = "nppi",
linkopts = [
"-lnppc",
"-lnppial",
"-lnppicom",
"-lnppidei",
"-lnppif",
"-lnppig",
"-lnppim",
"-lnppist",
"-lnppitc",
"-lnpps",
],
linkstatic = True,
deps = [":linker_search_path"],
)
# NVIDIA Management Library
cc_library(
name = "nvml",
linkopts = [
"-lnvidia-ml",
"-Wl,-rpath,/usr/lib/nvidia-410",
"-Wl,-rpath,/usr/lib/nvidia-390",
"-Wl,-rpath,/usr/lib/nvidia-387",
"-Wl,-rpath,/usr/lib/nvidia-384",
],
deps = [":linker_search_path"],
)
cc_library(
name = "cupti_headers",
hdrs = [
":cuda-extras",
],
includes = [
".",
"extras/CUPTI/include/",
],
)
# cupti .so exposed at linktime
cc_library(
name = "cupti_link",
linkopts = [
"-L/usr/local/cuda/extras/CUPTI/lib64",
"-lcupti",
],
)
cc_library(
name = "libdevice_root",
data = [":cuda-nvvm"],
)
CUDA_INCLUDES_FILES = [
"include/builtin_types.h",
"include/channel_descriptor.h",
"include/CL/cl_egl.h",
"include/CL/cl_ext.h",
"include/CL/cl_gl_ext.h",
"include/CL/cl_gl.h",
"include/CL/cl.h",
"include/CL/cl.hpp",
"include/CL/cl_platform.h",
"include/CL/opencl.h",
"include/common_functions.h",
"include/cooperative_groups.h",
"include/cooperative_groups_helpers.h",
"include/crt/common_functions.h",
"include/crt/device_double_functions.h",
"include/crt/device_double_functions.hpp",
"include/crt/device_functions.h",
"include/crt/device_functions.hpp",
"include/crt/func_macro.h",
"include/crt/host_config.h",
"include/crt/host_defines.h",
"include/crt/host_runtime.h",
"include/crt/math_functions.h",
"include/crt/math_functions.hpp",
"include/crt/mma.h",
"include/crt/mma.hpp",
"include/crt/nvfunctional",
"include/crt/sm_70_rt.h",
"include/crt/sm_70_rt.hpp",
"include/crt/storage_class.h",
# TODO: figure out why on a CI machine with CUDA 10.2 it's not present
# "include/cublas_api.h",
# "include/cublas.h",
# "include/cublas_v2.h",
# "include/cublasXt.h",
"include/cuComplex.h",
"include/cuda_device_runtime_api.h",
"include/cudaEGL.h",
"include/cuda_egl_interop.h",
"include/cuda_fp16.h",
"include/cuda_fp16.hpp",
"include/cudaGL.h",
"include/cuda_gl_interop.h",
"include/cuda.h",
"include/cudalibxt.h",
"include/cuda_occupancy.h",
"include/cuda_profiler_api.h",
"include/cudaProfiler.h",
"include/cudart_platform.h",
"include/cuda_runtime_api.h",
"include/cuda_runtime.h",
"include/cuda_surface_types.h",
"include/cuda_texture_types.h",
"include/cudaVDPAU.h",
"include/cuda_vdpau_interop.h",
"include/cufft.h",
"include/cufftw.h",
"include/cufftXt.h",
"include/curand_discrete2.h",
"include/curand_discrete.h",
"include/curand_globals.h",
"include/curand.h",
"include/curand_kernel.h",
"include/curand_lognormal.h",
"include/curand_mrg32k3a.h",
"include/curand_mtgp32dc_p_11213.h",
"include/curand_mtgp32.h",
"include/curand_mtgp32_host.h",
"include/curand_mtgp32_kernel.h",
"include/curand_normal.h",
"include/curand_normal_static.h",
"include/curand_philox4x32_x.h",
"include/curand_poisson.h",
"include/curand_precalc.h",
"include/curand_uniform.h",
"include/cusolver_common.h",
"include/cusolverDn.h",
"include/cusolverRf.h",
"include/cusolverSp.h",
"include/cusolverSp_LOWLEVEL_PREVIEW.h",
"include/cusparse.h",
"include/cusparse_v2.h",
"include/device_atomic_functions.h",
"include/device_atomic_functions.hpp",
"include/device_double_functions.h",
"include/device_functions.h",
"include/device_launch_parameters.h",
"include/device_types.h",
"include/driver_functions.h",
"include/driver_types.h",
"include/fatBinaryCtl.h",
"include/fatbinary.h",
"include/host_config.h",
"include/host_defines.h",
"include/library_types.h",
"include/math_constants.h",
"include/math_functions.h",
"include/mma.h",
"include/nppcore.h",
"include/nppdefs.h",
"include/npp.h",
"include/nppi_arithmetic_and_logical_operations.h",
"include/nppi_color_conversion.h",
"include/nppi_compression_functions.h",
"include/nppi_computer_vision.h",
"include/nppi_data_exchange_and_initialization.h",
"include/nppi_filtering_functions.h",
"include/nppi_geometry_transforms.h",
"include/nppi.h",
"include/nppi_linear_transforms.h",
"include/nppi_morphological_operations.h",
"include/nppi_statistics_functions.h",
"include/nppi_support_functions.h",
"include/nppi_threshold_and_compare_operations.h",
"include/npps_arithmetic_and_logical_operations.h",
"include/npps_conversion_functions.h",
"include/npps_filtering_functions.h",
"include/npps.h",
"include/npps_initialization.h",
"include/npps_statistics_functions.h",
"include/npps_support_functions.h",
# Note: CUDA 10.0 only
# "include/nppversion.h",
# TODO: figure out why on a CI machine with CUDA 10.2 it's not present
# "include/nvblas.h",
"include/nvfunctional",
"include/nvgraph.h",
"include/nvjpeg.h",
"include/nvml.h",
"include/nvrtc.h",
"include/nvToolsExtCuda.h",
"include/nvToolsExtCudaRt.h",
"include/nvToolsExt.h",
"include/nvToolsExtMeta.h",
"include/nvToolsExtSync.h",
"include/nvtx3/nvToolsExtCuda.h",
"include/nvtx3/nvToolsExtCudaRt.h",
"include/nvtx3/nvToolsExt.h",
"include/nvtx3/nvToolsExtOpenCL.h",
"include/nvtx3/nvToolsExtSync.h",
"include/nvtx3/nvtxDetail/nvtxImplCore.h",
"include/nvtx3/nvtxDetail/nvtxImplCudaRt_v3.h",
"include/nvtx3/nvtxDetail/nvtxImplCuda_v3.h",
"include/nvtx3/nvtxDetail/nvtxImpl.h",
"include/nvtx3/nvtxDetail/nvtxImplOpenCL_v3.h",
"include/nvtx3/nvtxDetail/nvtxImplSync_v3.h",
"include/nvtx3/nvtxDetail/nvtxInitDecls.h",
"include/nvtx3/nvtxDetail/nvtxInitDefs.h",
"include/nvtx3/nvtxDetail/nvtxInit.h",
"include/nvtx3/nvtxDetail/nvtxLinkOnce.h",
"include/nvtx3/nvtxDetail/nvtxTypes.h",
"include/sm_20_atomic_functions.h",
"include/sm_20_atomic_functions.hpp",
"include/sm_20_intrinsics.h",
"include/sm_20_intrinsics.hpp",
"include/sm_30_intrinsics.h",
"include/sm_30_intrinsics.hpp",
"include/sm_32_atomic_functions.h",
"include/sm_32_atomic_functions.hpp",
"include/sm_32_intrinsics.h",
"include/sm_32_intrinsics.hpp",
"include/sm_35_atomic_functions.h",
"include/sm_35_intrinsics.h",
"include/sm_60_atomic_functions.h",
"include/sm_60_atomic_functions.hpp",
"include/sm_61_intrinsics.h",
"include/sm_61_intrinsics.hpp",
# CUDA 10.0 only
# "include/sobol_direction_vectors.h",
"include/surface_functions.h",
"include/surface_functions.hpp",
"include/surface_indirect_functions.h",
"include/surface_indirect_functions.hpp",
"include/surface_types.h",
"include/texture_fetch_functions.h",
"include/texture_fetch_functions.hpp",
"include/texture_indirect_functions.h",
"include/texture_indirect_functions.hpp",
"include/texture_types.h",
"include/vector_functions.h",
"include/vector_functions.hpp",
"include/vector_types.h",
]
genrule(
name = "cuda-include",
outs = CUDA_INCLUDES_FILES,
cmd = " && ".join([
"ln -s /usr/local/cuda/{relpath} $(@D)/{relpath}".format(relpath = p)
for p in CUDA_INCLUDES_FILES
]),
local = True,
tags = ["no-cache"],
)
CUDA_NVVM_FILES = [
"nvvm/bin/cicc",
"nvvm/include/nvvm.h",
"nvvm/lib64/libnvvm.so",
"nvvm/lib64/libnvvm.so.3",
"nvvm/lib64/libnvvm.so.3.3.0",
"nvvm/libdevice/libdevice.10.bc",
]
genrule(
name = "cuda-nvvm",
outs = CUDA_NVVM_FILES,
cmd = " && ".join([
"ln -s /usr/local/cuda/{relpath} $(@D)/{relpath}".format(relpath = p)
for p in CUDA_NVVM_FILES
]),
local = True,
tags = ["no-cache"],
)
CUDA_EXTRAS_FILES = [
"extras/CUPTI/include/cuda_stdint.h",
"extras/CUPTI/include/cupti.h",
"extras/CUPTI/include/cupti_activity.h",
"extras/CUPTI/include/cupti_callbacks.h",
"extras/CUPTI/include/cupti_driver_cbid.h",
"extras/CUPTI/include/cupti_events.h",
"extras/CUPTI/include/cupti_metrics.h",
"extras/CUPTI/include/cupti_nvtx_cbid.h",
"extras/CUPTI/include/cupti_result.h",
"extras/CUPTI/include/cupti_runtime_cbid.h",
"extras/CUPTI/include/cupti_version.h",
"extras/CUPTI/include/generated_cuda_gl_interop_meta.h",
"extras/CUPTI/include/generated_cuda_meta.h",
"extras/CUPTI/include/generated_cuda_runtime_api_meta.h",
"extras/CUPTI/include/generated_cuda_vdpau_interop_meta.h",
"extras/CUPTI/include/generated_cudaGL_meta.h",
"extras/CUPTI/include/generated_cudaVDPAU_meta.h",
"extras/CUPTI/include/generated_nvtx_meta.h",
"extras/CUPTI/include/GL/gl.h",
"extras/CUPTI/include/GL/glew.h",
"extras/CUPTI/include/GL/glext.h",
"extras/CUPTI/include/GL/glu.h",
"extras/CUPTI/include/GL/glut.h",
"extras/CUPTI/include/GL/glx.h",
"extras/CUPTI/include/GL/glxext.h",
"extras/CUPTI/include/GL/wglew.h",
"extras/CUPTI/include/GL/wglext.h",
"extras/CUPTI/include/openacc/cupti_openacc.h",
]
genrule(
name = "cuda-extras",
outs = CUDA_EXTRAS_FILES,
cmd = " && ".join([
"ln -s /usr/local/cuda/{relpath} $(@D)/{relpath}".format(relpath = p)
for p in CUDA_EXTRAS_FILES
]),
local = True,
tags = ["no-cache"],
)
genrule(
name = "cudnn-include",
outs = [
"include/cudnn.h",
],
cmd = """
ln -s /usr/include/cudnn.h $(@D)/cudnn.h""",
local = True,
tags = ["no-cache"],
)

View File

@ -162,8 +162,8 @@ cc_library(
cc_library( cc_library(
name = "tensorpipe_cuda", name = "tensorpipe_cuda",
srcs = TENSORPIPE_CUDA_SOURCES, srcs = glob(TENSORPIPE_CUDA_SOURCES),
hdrs = TENSORPIPE_CUDA_HEADERS + [":tensorpipe_cuda_config_header"], hdrs = glob(TENSORPIPE_CUDA_HEADERS) + [":tensorpipe_cuda_config_header"],
includes = [ includes = [
".", ".",
], ],

View File

@ -1,3 +1,6 @@
# gpu support is not available load("@rules_cuda//cuda:defs.bzl", "cuda_library")
def cu_library(**kwargs):
pass NVCC_COPTS = ["--expt-relaxed-constexpr", "--expt-extended-lambda"]
def cu_library(name, srcs, copts = [], **kwargs):
cuda_library(name, srcs = srcs, copts = NVCC_COPTS + copts, **kwargs)