[reland] .github: Add initial linux CI workflow (#56280)

Summary:
This reverts commit 6b5ed5ec454ecd8597ff0465305915dd1e09a805.

There'll also probably be fixes here, see diff from original PR: https://github.com/pytorch/pytorch/compare/f2abce0...ci-all/add-initial-linux-ci-gha

Pull Request resolved: https://github.com/pytorch/pytorch/pull/56280

Reviewed By: walterddr

Differential Revision: D27826012

Pulled By: seemethere

fbshipit-source-id: 71cad1d7f840ede5025b1bb4a33d628aa74686d1
This commit is contained in:
Eli Uriegas
2021-04-19 17:34:50 -07:00
committed by Facebook GitHub Bot
parent 0917061f43
commit 31677c5fcb
13 changed files with 676 additions and 14 deletions

164
.github/scripts/generate_linux_ci_workflows.py vendored Executable file
View File

@ -0,0 +1,164 @@
#!/usr/bin/env python
from pathlib import Path
import jinja2
DOCKER_REGISTRY = "308535385114.dkr.ecr.us-east-1.amazonaws.com"
GITHUB_DIR = Path(__file__).parent.parent
CPU_TEST_RUNNER = "linux.2xlarge"
CUDA_TEST_RUNNER = "linux.8xlarge.nvidia.gpu"
class PyTorchLinuxWorkflow:
def __init__(self, build_environment: str, docker_image_base: str):
self.build_environment = build_environment
self.docker_image_base = docker_image_base
self.test_runner_type = CPU_TEST_RUNNER
if "cuda" in build_environment:
self.test_runner_type = CUDA_TEST_RUNNER
def generate_workflow_file(
self, workflow_template: jinja2.Template, jinja_env: jinja2.Environment
) -> Path:
output_file_path = GITHUB_DIR.joinpath(
f"workflows/{self.build_environment}.yml"
)
with open(output_file_path, "w") as output_file:
output_file.write(
workflow_template.render(
build_environment=self.build_environment,
docker_image_base=self.docker_image_base,
test_runner_type=self.test_runner_type
)
)
output_file.write('\n')
return output_file_path
WORKFLOWS = [
PyTorchLinuxWorkflow(
build_environment="pytorch-linux-xenial-py3.6-gcc5.4",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-paralleltbb-linux-xenial-py3.6-gcc5.4",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-parallelnative-linux-xenial-py3.6-gcc5.4",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-pure_torch-linux-xenial-py3.6-gcc5.4",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3.6-gcc7",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc7",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-asan",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-asan",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang7-onnx",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang7-onnx",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-libtorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-bionic-py3.6-clang9-noarch",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-xla-linux-bionic-py3.6-clang9",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-vulkan-linux-bionic-py3.6-clang9",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-bionic-py3.8-gcc9-coverage",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.8-gcc9",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-bionic-rocm3.9-py3.6",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-rocm3.9-py3.6",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_32",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_64",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v7a",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v8a",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-mobile",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-asan",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-mobile-custom-dynamic",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-mobile-custom-static",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-mobile-code-analysis",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_32",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_64",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v7a",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v8a",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# ),
]
if __name__ == "__main__":
jinja_env = jinja2.Environment(
variable_start_string="!{{",
loader=jinja2.FileSystemLoader(str(GITHUB_DIR.joinpath("templates"))),
)
workflow_template = jinja_env.get_template("linux_ci_workflow.yml.in")
for workflow in WORKFLOWS:
print(
workflow.generate_workflow_file(
workflow_template=workflow_template,
jinja_env=jinja_env
)
)

43
.github/scripts/install_nvidia_utils_linux.sh vendored Executable file
View File

@ -0,0 +1,43 @@
#!/usr/bin/env bash
set -eou pipefail
DISTRIBUTION=$(. /etc/os-release;echo $ID$VERSION_ID) \
DRIVER_FN="NVIDIA-Linux-x86_64-460.39.run"
YUM_REPO_URL="https://nvidia.github.io/nvidia-docker/${DISTRIBUTION}/nvidia-docker.repo"
install_nvidia_docker2_amzn2() {
(
set -x
# Needed for yum-config-manager
sudo yum install -y yum-utils
sudo yum-config-manager --add-repo "${YUM_REPO_URL}"
sudo yum install -y nvidia-docker2
sudo systemctl restart docker
)
}
install_nvidia_driver() {
(
set -x
sudo yum groupinstall -y "Development Tools"
curl -fsL -o nvidia_driver "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
sudo /bin/bash nvidia_driver -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
nvidia-smi
)
}
# Install container toolkit based on distribution
echo "== Installing nvidia container toolkit for ${DISTRIBUTION} =="
case "${DISTRIBUTION}" in
amzn*)
install_nvidia_docker2_amzn2
;;
*)
echo "ERROR: Unknown distribution ${DISTRIBUTION}"
exit 1
;;
esac
echo "== Installing nvidia driver ${DRIVER_FN} =="
install_nvidia_driver

5
.github/scripts/report_git_status.sh vendored Executable file
View File

@ -0,0 +1,5 @@
#!/usr/bin/env bash
CHANGES=$(git status --porcelain)
echo "$CHANGES"
git diff
[ -z "$CHANGES" ]

View File

@ -0,0 +1,193 @@
# @generated by .github/scripts/generate_linux_ci_workflows.py, Do not update manually
#
# Template is at: .github/templates/linux_ci_workflow.yml
# Generation script: .github/scripts/generate_linux_ci_workflows.py
name: Linux CI (!{{ build_environment }})
on:
# TODO: Enable pull_request builds when we can verify capacity can be met by auto-scalers
# pull_request:
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: !{{ build_environment }}
DOCKER_IMAGE_BASE: !{{ docker_image_base }}
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
jobs:
calculate-docker-image:
runs-on: ubuntu-18.04
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Checkout PyTorch
uses: actions/checkout@v2
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
build:
runs-on: linux.2xlarge
needs: calculate-docker-image
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
steps:
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
- name: Checkout PyTorch
uses: actions/checkout@v2
with:
fetch-depth: 0 # deep clone, to allow sharding to use git rev-list
submodules: recursive
- name: Log in to ECR
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Create test binary build directories
run: |
mkdir -pv ../custom-op-build
mkdir -pv ../custom-backend-build
mkdir -pv ../jit-hook-build
- name: Build PyTorch
run: |
SCCACHE_MAX_JOBS=$(( $(nproc) - 1 ))
MEMORY_LIMIT_MAX_JOBS=8 # our "linux.2xlarge" runner has 16 vCPUs, if we use all of them we'll OOM
export MAX_JOBS=$(( SCCACHE_MAX_JOBS > MEMORY_LIMIT_MAX_JOBS ? MEMORY_LIMIT_MAX_JOBS : SCCACHE_MAX_JOBS ))
# Why the three volume mounts here? So test binaries are put in the correct spot
# NOTE: You cannot volume mount ${GITHUB_WORKSPACE}../:/var/lib/jenkins since sccache connection will hang
# See CUSTOM_OP_BUILD, JIT_HOOK_BUILD, CUSTOM_BACKEND_BUILD
# TODO: Stop building test binaries as part of the build phase
docker run \
-e BUILD_ENVIRONMENT \
-e MAX_JOBS \
-e SCCACHE_BUCKET \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-v "${GITHUB_WORKSPACE}../custom-op-build:/var/lib/jenkins/custom-op-build" \
-v "${GITHUB_WORKSPACE}../custom-backend-build:/var/lib/jenkins/custom-backend-build" \
-v "${GITHUB_WORKSPACE}../jit-hook-build:/var/lib/jenkins/jit-hook-build" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins ../ && .jenkins/pytorch/build.sh'
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)/../":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
- name: Archive artifacts into zip
run: |
(cd "${GITHUB_WORKSPACE}/../" && zip -r pytorch/artifacts.zip pytorch/dist pytorch/build custom-op-build/ custom-backend-build/ jit-hook-build/)
- uses: actions/upload-artifact@v2
name: Store PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
retention-days: 30
if-no-files-found: error
path:
artifacts.zip
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af
test:
runs-on: !{{ test_runner_type }}
needs:
- calculate-docker-image
- build
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
steps:
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
- name: Checkout PyTorch
uses: actions/checkout@v2
- name: Log in to ECR
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') }}
run: |
bash .github/scripts/install_nvidia_utils_linux.sh
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
- name: Determine shm-size
run: |
shm_size="1g"
case "${BUILD_ENVIRONMENT}" in
*cuda*)
shm_size="2g"
;;
*rocm*)
shm_size="8g"
;;
esac
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
- uses: actions/download-artifact@v2
name: Download PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
- name: Unzip artifacts
run: |
(cd "${GITHUB_WORKSPACE}/../" && unzip -q pytorch/artifacts.zip)
- name: Output disk space left
run: |
sudo df -H
- name: Test PyTorch
run: |
SCCACHE_MAX_JOBS=$(( $(nproc) - 1 ))
MEMORY_LIMIT_MAX_JOBS=8 # our "linux.2xlarge" runner has 16 vCPUs, if we use all of them we'll OOM
export MAX_JOBS=$(( SCCACHE_MAX_JOBS > MEMORY_LIMIT_MAX_JOBS ? MEMORY_LIMIT_MAX_JOBS : SCCACHE_MAX_JOBS ))
# Why the three volume mounts here? So test binaries are put in the correct spot
# NOTE: You cannot volume mount ${GITHUB_WORKSPACE}../:/var/lib/jenkins since sccache connection will hang
# See CUSTOM_OP_BUILD, JIT_HOOK_BUILD, CUSTOM_BACKEND_BUILD
# TODO: Stop building test binaries as part of the build phase
# Used for GPU_FLAG since that doesn't play nice
# shellcheck disable=SC2086
docker run \
${GPU_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e IN_CI \
-e MAX_JOBS \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="${SHM_SIZE}" \
--tty \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-v "${GITHUB_WORKSPACE}../custom-op-build:/var/lib/jenkins/custom-op-build" \
-v "${GITHUB_WORKSPACE}../custom-backend-build:/var/lib/jenkins/custom-backend-build" \
-v "${GITHUB_WORKSPACE}../jit-hook-build:/var/lib/jenkins/jit-hook-build" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins ../ && pip install dist/*.whl && .jenkins/pytorch/test.sh'
- name: Clean up docker images
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
# Prune all of the docker images
docker system prune -af

View File

@ -95,6 +95,23 @@ jobs:
run: |
python2 setup.py | grep "Python 2 has reached end-of-life and is no longer supported by PyTorch."
templates:
runs-on: ubuntu-18.04
steps:
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.x
architecture: x64
- name: Install Jinja2
run: pip install Jinja2
- name: Checkout PyTorch
uses: actions/checkout@v2
- name: Regenerate workflows
run: .github/scripts/generate_linux_ci_workflows.py
- name: Assert that regenerating the workflows didn't change them
run: .github/scripts/report_git_status.sh
toc:
runs-on: ubuntu-18.04
# https://github.com/actions/virtual-environments/issues/599#issuecomment-602754687
@ -115,12 +132,7 @@ jobs:
markdown-toc --bullets='-' -i "$FILE"
done
- name: Assert that regenerating the ToCs didn't change them
run: |
set -eux
CHANGES=$(git status --porcelain)
echo "$CHANGES"
git diff
[ -z "$CHANGES" ]
run: .github/scripts/report_git_status.sh
flake8-py3:
runs-on: ubuntu-18.04

View File

@ -0,0 +1,193 @@
# @generated by .github/scripts/generate_linux_ci_workflows.py, Do not update manually
#
# Template is at: .github/templates/linux_ci_workflow.yml
# Generation script: .github/scripts/generate_linux_ci_workflows.py
name: Linux CI (pytorch-linux-xenial-py3.6-gcc5.4)
on:
# TODO: Enable pull_request builds when we can verify capacity can be met by auto-scalers
# pull_request:
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: pytorch-linux-xenial-py3.6-gcc5.4
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
jobs:
calculate-docker-image:
runs-on: ubuntu-18.04
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Checkout PyTorch
uses: actions/checkout@v2
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
build:
runs-on: linux.2xlarge
needs: calculate-docker-image
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
steps:
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
- name: Checkout PyTorch
uses: actions/checkout@v2
with:
fetch-depth: 0 # deep clone, to allow sharding to use git rev-list
submodules: recursive
- name: Log in to ECR
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Create test binary build directories
run: |
mkdir -pv ../custom-op-build
mkdir -pv ../custom-backend-build
mkdir -pv ../jit-hook-build
- name: Build PyTorch
run: |
SCCACHE_MAX_JOBS=$(( $(nproc) - 1 ))
MEMORY_LIMIT_MAX_JOBS=8 # our "linux.2xlarge" runner has 16 vCPUs, if we use all of them we'll OOM
export MAX_JOBS=$(( SCCACHE_MAX_JOBS > MEMORY_LIMIT_MAX_JOBS ? MEMORY_LIMIT_MAX_JOBS : SCCACHE_MAX_JOBS ))
# Why the three volume mounts here? So test binaries are put in the correct spot
# NOTE: You cannot volume mount ${GITHUB_WORKSPACE}../:/var/lib/jenkins since sccache connection will hang
# See CUSTOM_OP_BUILD, JIT_HOOK_BUILD, CUSTOM_BACKEND_BUILD
# TODO: Stop building test binaries as part of the build phase
docker run \
-e BUILD_ENVIRONMENT \
-e MAX_JOBS \
-e SCCACHE_BUCKET \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-v "${GITHUB_WORKSPACE}../custom-op-build:/var/lib/jenkins/custom-op-build" \
-v "${GITHUB_WORKSPACE}../custom-backend-build:/var/lib/jenkins/custom-backend-build" \
-v "${GITHUB_WORKSPACE}../jit-hook-build:/var/lib/jenkins/jit-hook-build" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins ../ && .jenkins/pytorch/build.sh'
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)/../":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
- name: Archive artifacts into zip
run: |
(cd "${GITHUB_WORKSPACE}/../" && zip -r pytorch/artifacts.zip pytorch/dist pytorch/build custom-op-build/ custom-backend-build/ jit-hook-build/)
- uses: actions/upload-artifact@v2
name: Store PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
retention-days: 30
if-no-files-found: error
path:
artifacts.zip
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af
test:
runs-on: linux.2xlarge
needs:
- calculate-docker-image
- build
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
steps:
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
- name: Checkout PyTorch
uses: actions/checkout@v2
- name: Log in to ECR
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') }}
run: |
bash .github/scripts/install_nvidia_utils_linux.sh
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
- name: Determine shm-size
run: |
shm_size="1g"
case "${BUILD_ENVIRONMENT}" in
*cuda*)
shm_size="2g"
;;
*rocm*)
shm_size="8g"
;;
esac
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
- uses: actions/download-artifact@v2
name: Download PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
- name: Unzip artifacts
run: |
(cd "${GITHUB_WORKSPACE}/../" && unzip -q pytorch/artifacts.zip)
- name: Output disk space left
run: |
sudo df -H
- name: Test PyTorch
run: |
SCCACHE_MAX_JOBS=$(( $(nproc) - 1 ))
MEMORY_LIMIT_MAX_JOBS=8 # our "linux.2xlarge" runner has 16 vCPUs, if we use all of them we'll OOM
export MAX_JOBS=$(( SCCACHE_MAX_JOBS > MEMORY_LIMIT_MAX_JOBS ? MEMORY_LIMIT_MAX_JOBS : SCCACHE_MAX_JOBS ))
# Why the three volume mounts here? So test binaries are put in the correct spot
# NOTE: You cannot volume mount ${GITHUB_WORKSPACE}../:/var/lib/jenkins since sccache connection will hang
# See CUSTOM_OP_BUILD, JIT_HOOK_BUILD, CUSTOM_BACKEND_BUILD
# TODO: Stop building test binaries as part of the build phase
# Used for GPU_FLAG since that doesn't play nice
# shellcheck disable=SC2086
docker run \
${GPU_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e IN_CI \
-e MAX_JOBS \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="${SHM_SIZE}" \
--tty \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-v "${GITHUB_WORKSPACE}../custom-op-build:/var/lib/jenkins/custom-op-build" \
-v "${GITHUB_WORKSPACE}../custom-backend-build:/var/lib/jenkins/custom-backend-build" \
-v "${GITHUB_WORKSPACE}../jit-hook-build:/var/lib/jenkins/jit-hook-build" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins ../ && pip install dist/*.whl && .jenkins/pytorch/test.sh'
- name: Clean up docker images
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
# Prune all of the docker images
docker system prune -af

9
.gitignore vendored
View File

@ -292,3 +292,12 @@ bazel-*
# direnv, posh-direnv
.envrc
.psenvrc
# generated shellcheck directories
.shellcheck_generated*/
# zip archives
*.zip
# core dump files
core.*

View File

@ -59,6 +59,17 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda11* ]]; then
export BUILD_SPLIT_CUDA=ON
fi
if [[ ${BUILD_ENVIRONMENT} == *"pure_torch"* ]]; then
export BUILD_CAFFE2=OFF
fi
if [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
export ATEN_THREADING=TBB
export USE_TBB=1
elif [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
export ATEN_THREADING=NATIVE
fi
# TODO: Don't run this...
pip_install -r requirements.txt || true
@ -234,7 +245,7 @@ else
CUSTOM_OP_TEST="$PWD/test/custom_operator"
python --version
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
mkdir "$CUSTOM_OP_BUILD"
mkdir -p "$CUSTOM_OP_BUILD"
pushd "$CUSTOM_OP_BUILD"
cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)"
make VERBOSE=1
@ -246,7 +257,7 @@ else
JIT_HOOK_TEST="$PWD/test/jit_hooks"
python --version
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
mkdir "$JIT_HOOK_BUILD"
mkdir -p "$JIT_HOOK_BUILD"
pushd "$JIT_HOOK_BUILD"
cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)"
make VERBOSE=1
@ -257,7 +268,7 @@ else
CUSTOM_BACKEND_BUILD="$PWD/../custom-backend-build"
CUSTOM_BACKEND_TEST="$PWD/test/custom_backend"
python --version
mkdir "$CUSTOM_BACKEND_BUILD"
mkdir -p "$CUSTOM_BACKEND_BUILD"
pushd "$CUSTOM_BACKEND_BUILD"
cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)"
make VERBOSE=1

View File

@ -72,7 +72,16 @@ if [[ "$BUILD_ENVIRONMENT" != *pytorch-win-* ]]; then
# Save sccache logs to file
sccache --stop-server || true
rm ~/sccache_error.log || true
if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
if [[ -n "${SKIP_SCCACHE_INITIALIZATION:-}" ]]; then
# sccache --start-server seems to hang forever on self hosted runners for GHA
# so let's just go ahead and skip the --start-server altogether since it seems
# as though sccache still gets used even when the sscache server isn't started
# explicitly
echo "Skipping sccache server initialization, setting environment variables"
export SCCACHE_IDLE_TIMEOUT=1200
export SCCACHE_ERROR_LOG=~/sccache_error.log
export RUST_LOG=sccache::server=error
elif [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
else
# increasing SCCACHE_IDLE_TIMEOUT so that extension_backend_test.cpp can build after this PR:

View File

@ -51,7 +51,11 @@ test_python_all() {
export GLOO_SOCKET_IFNAME=lo0
echo "Ninja version: $(ninja --version)"
if [ -n "$CIRCLE_PULL_REQUEST" ]; then
# Try to pull value from CIRCLE_PULL_REQUEST first then GITHUB_HEAD_REF second
# CIRCLE_PULL_REQUEST comes from CircleCI
# GITHUB_HEAD_REF comes from Github Actions
IN_PULL_REQUEST=${CIRCLE_PULL_REQUEST:-${GITHUB_HEAD_REF:-}}
if [ -n "$IN_PULL_REQUEST" ]; then
DETERMINE_FROM=$(mktemp)
file_diff_from_base "$DETERMINE_FROM"
fi

View File

@ -115,7 +115,11 @@ elif [[ "${BUILD_ENVIRONMENT}" == *-NO_AVX2-* ]]; then
export ATEN_CPU_CAPABILITY=avx
fi
if [ -n "$CIRCLE_PULL_REQUEST" ] && [[ "$BUILD_ENVIRONMENT" != *coverage* ]]; then
# Try to pull value from CIRCLE_PULL_REQUEST first then GITHUB_HEAD_REF second
# CIRCLE_PULL_REQUEST comes from CircleCI
# GITHUB_HEAD_REF comes from Github Actions
IN_PULL_REQUEST=${CIRCLE_PULL_REQUEST:-${GITHUB_HEAD_REF:-}}
if [ -n "$IN_PULL_REQUEST" ] && [[ "$BUILD_ENVIRONMENT" != *coverage* ]]; then
DETERMINE_FROM=$(mktemp)
file_diff_from_base "$DETERMINE_FROM"
fi

View File

@ -42,12 +42,16 @@ fi
export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers
if [ -n "$CIRCLE_PULL_REQUEST" ]; then
# Try to pull value from CIRCLE_PULL_REQUEST first then GITHUB_HEAD_REF second
# CIRCLE_PULL_REQUEST comes from CircleCI
# GITHUB_HEAD_REF comes from Github Actions
IN_PULL_REQUEST=${CIRCLE_PULL_REQUEST:-${GITHUB_HEAD_REF:-}}
if [ -n "$IN_PULL_REQUEST" ]; then
DETERMINE_FROM="${TMP_DIR}/determine_from"
file_diff_from_base "$DETERMINE_FROM"
fi
if [[ "${CIRCLE_JOB}" == *11* ]]; then
if [[ "${BUILD_ENVIRONMENT}" == *cuda11* ]]; then
export BUILD_SPLIT_CUDA=ON
fi

View File

@ -14,8 +14,19 @@ ios:
clean: # This will remove ALL build folders.
@rm -r build*/
@$(RM) -r $(SHELLCHECK_GHA_GENERATED_FOLDER)
linecount:
@cloc --read-lang-def=caffe.cloc caffe2 || \
echo "Cloc is not available on the machine. You can install cloc with " && \
echo " sudo apt-get install cloc"
SHELLCHECK_GHA_GENERATED_FOLDER=.shellcheck_generated_gha
shellcheck-gha:
@$(RM) -r $(SHELLCHECK_GHA_GENERATED_FOLDER)
tools/extract_scripts.py --out=$(SHELLCHECK_GHA_GENERATED_FOLDER)
tools/run_shellcheck.sh $(SHELLCHECK_GHA_GENERATED_FOLDER)
generate-gha-workflows:
./.github/scripts/generate_linux_ci_workflows.py
$(MAKE) shellcheck-gha