mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
[reland] .github: Add initial linux CI workflow (#56280)
Summary: This reverts commit 6b5ed5ec454ecd8597ff0465305915dd1e09a805. There'll also probably be fixes here, see diff from original PR: https://github.com/pytorch/pytorch/compare/f2abce0...ci-all/add-initial-linux-ci-gha Pull Request resolved: https://github.com/pytorch/pytorch/pull/56280 Reviewed By: walterddr Differential Revision: D27826012 Pulled By: seemethere fbshipit-source-id: 71cad1d7f840ede5025b1bb4a33d628aa74686d1
This commit is contained in:
committed by
Facebook GitHub Bot
parent
0917061f43
commit
31677c5fcb
164
.github/scripts/generate_linux_ci_workflows.py
vendored
Executable file
164
.github/scripts/generate_linux_ci_workflows.py
vendored
Executable file
@ -0,0 +1,164 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import jinja2
|
||||
|
||||
DOCKER_REGISTRY = "308535385114.dkr.ecr.us-east-1.amazonaws.com"
|
||||
|
||||
GITHUB_DIR = Path(__file__).parent.parent
|
||||
|
||||
CPU_TEST_RUNNER = "linux.2xlarge"
|
||||
CUDA_TEST_RUNNER = "linux.8xlarge.nvidia.gpu"
|
||||
|
||||
|
||||
class PyTorchLinuxWorkflow:
|
||||
def __init__(self, build_environment: str, docker_image_base: str):
|
||||
self.build_environment = build_environment
|
||||
self.docker_image_base = docker_image_base
|
||||
self.test_runner_type = CPU_TEST_RUNNER
|
||||
if "cuda" in build_environment:
|
||||
self.test_runner_type = CUDA_TEST_RUNNER
|
||||
|
||||
def generate_workflow_file(
|
||||
self, workflow_template: jinja2.Template, jinja_env: jinja2.Environment
|
||||
) -> Path:
|
||||
output_file_path = GITHUB_DIR.joinpath(
|
||||
f"workflows/{self.build_environment}.yml"
|
||||
)
|
||||
with open(output_file_path, "w") as output_file:
|
||||
output_file.write(
|
||||
workflow_template.render(
|
||||
build_environment=self.build_environment,
|
||||
docker_image_base=self.docker_image_base,
|
||||
test_runner_type=self.test_runner_type
|
||||
)
|
||||
)
|
||||
output_file.write('\n')
|
||||
return output_file_path
|
||||
|
||||
|
||||
WORKFLOWS = [
|
||||
PyTorchLinuxWorkflow(
|
||||
build_environment="pytorch-linux-xenial-py3.6-gcc5.4",
|
||||
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
|
||||
),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-paralleltbb-linux-xenial-py3.6-gcc5.4",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-parallelnative-linux-xenial-py3.6-gcc5.4",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-pure_torch-linux-xenial-py3.6-gcc5.4",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-py3.6-gcc7",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc7",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-py3-clang5-asan",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-asan",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-py3-clang7-onnx",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang7-onnx",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-libtorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-bionic-py3.6-clang9-noarch",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-xla-linux-bionic-py3.6-clang9",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-vulkan-linux-bionic-py3.6-clang9",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-bionic-py3.8-gcc9-coverage",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.8-gcc9",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-bionic-rocm3.9-py3.6",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-rocm3.9-py3.6",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_32",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_64",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v7a",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v8a",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-py3-clang5-mobile",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-asan",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-py3-clang5-mobile-custom-dynamic",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-py3-clang5-mobile-custom-static",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-py3-clang5-mobile-code-analysis",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_32",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_64",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v7a",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
|
||||
# ),
|
||||
# PyTorchLinuxWorkflow(
|
||||
# build_environment="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v8a",
|
||||
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
|
||||
# ),
|
||||
]
|
||||
|
||||
if __name__ == "__main__":
|
||||
jinja_env = jinja2.Environment(
|
||||
variable_start_string="!{{",
|
||||
loader=jinja2.FileSystemLoader(str(GITHUB_DIR.joinpath("templates"))),
|
||||
)
|
||||
workflow_template = jinja_env.get_template("linux_ci_workflow.yml.in")
|
||||
for workflow in WORKFLOWS:
|
||||
print(
|
||||
workflow.generate_workflow_file(
|
||||
workflow_template=workflow_template,
|
||||
jinja_env=jinja_env
|
||||
)
|
||||
)
|
43
.github/scripts/install_nvidia_utils_linux.sh
vendored
Executable file
43
.github/scripts/install_nvidia_utils_linux.sh
vendored
Executable file
@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -eou pipefail
|
||||
|
||||
DISTRIBUTION=$(. /etc/os-release;echo $ID$VERSION_ID) \
|
||||
DRIVER_FN="NVIDIA-Linux-x86_64-460.39.run"
|
||||
YUM_REPO_URL="https://nvidia.github.io/nvidia-docker/${DISTRIBUTION}/nvidia-docker.repo"
|
||||
|
||||
install_nvidia_docker2_amzn2() {
|
||||
(
|
||||
set -x
|
||||
# Needed for yum-config-manager
|
||||
sudo yum install -y yum-utils
|
||||
sudo yum-config-manager --add-repo "${YUM_REPO_URL}"
|
||||
sudo yum install -y nvidia-docker2
|
||||
sudo systemctl restart docker
|
||||
)
|
||||
}
|
||||
|
||||
install_nvidia_driver() {
|
||||
(
|
||||
set -x
|
||||
sudo yum groupinstall -y "Development Tools"
|
||||
curl -fsL -o nvidia_driver "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
|
||||
sudo /bin/bash nvidia_driver -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
|
||||
nvidia-smi
|
||||
)
|
||||
}
|
||||
|
||||
# Install container toolkit based on distribution
|
||||
echo "== Installing nvidia container toolkit for ${DISTRIBUTION} =="
|
||||
case "${DISTRIBUTION}" in
|
||||
amzn*)
|
||||
install_nvidia_docker2_amzn2
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unknown distribution ${DISTRIBUTION}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "== Installing nvidia driver ${DRIVER_FN} =="
|
||||
install_nvidia_driver
|
5
.github/scripts/report_git_status.sh
vendored
Executable file
5
.github/scripts/report_git_status.sh
vendored
Executable file
@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
CHANGES=$(git status --porcelain)
|
||||
echo "$CHANGES"
|
||||
git diff
|
||||
[ -z "$CHANGES" ]
|
193
.github/templates/linux_ci_workflow.yml.in
vendored
Normal file
193
.github/templates/linux_ci_workflow.yml.in
vendored
Normal file
@ -0,0 +1,193 @@
|
||||
# @generated by .github/scripts/generate_linux_ci_workflows.py, Do not update manually
|
||||
#
|
||||
# Template is at: .github/templates/linux_ci_workflow.yml
|
||||
# Generation script: .github/scripts/generate_linux_ci_workflows.py
|
||||
name: Linux CI (!{{ build_environment }})
|
||||
|
||||
on:
|
||||
# TODO: Enable pull_request builds when we can verify capacity can be met by auto-scalers
|
||||
# pull_request:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- release/*
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
BUILD_ENVIRONMENT: !{{ build_environment }}
|
||||
DOCKER_IMAGE_BASE: !{{ docker_image_base }}
|
||||
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
|
||||
TORCH_CUDA_ARCH_LIST: 5.2
|
||||
IN_CI: 1
|
||||
|
||||
jobs:
|
||||
calculate-docker-image:
|
||||
runs-on: ubuntu-18.04
|
||||
outputs:
|
||||
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Calculate docker image tag
|
||||
id: calculate-tag
|
||||
run: |
|
||||
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
|
||||
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
|
||||
build:
|
||||
runs-on: linux.2xlarge
|
||||
needs: calculate-docker-image
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
|
||||
steps:
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
|
||||
- name: Checkout PyTorch
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # deep clone, to allow sharding to use git rev-list
|
||||
submodules: recursive
|
||||
- name: Log in to ECR
|
||||
run: |
|
||||
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
|
||||
bash /tmp/ecr-login.sh
|
||||
rm /tmp/ecr-login.sh
|
||||
- name: Pull docker image
|
||||
run: |
|
||||
docker pull "${DOCKER_IMAGE}"
|
||||
- name: Create test binary build directories
|
||||
run: |
|
||||
mkdir -pv ../custom-op-build
|
||||
mkdir -pv ../custom-backend-build
|
||||
mkdir -pv ../jit-hook-build
|
||||
- name: Build PyTorch
|
||||
run: |
|
||||
SCCACHE_MAX_JOBS=$(( $(nproc) - 1 ))
|
||||
MEMORY_LIMIT_MAX_JOBS=8 # our "linux.2xlarge" runner has 16 vCPUs, if we use all of them we'll OOM
|
||||
export MAX_JOBS=$(( SCCACHE_MAX_JOBS > MEMORY_LIMIT_MAX_JOBS ? MEMORY_LIMIT_MAX_JOBS : SCCACHE_MAX_JOBS ))
|
||||
# Why the three volume mounts here? So test binaries are put in the correct spot
|
||||
# NOTE: You cannot volume mount ${GITHUB_WORKSPACE}../:/var/lib/jenkins since sccache connection will hang
|
||||
# See CUSTOM_OP_BUILD, JIT_HOOK_BUILD, CUSTOM_BACKEND_BUILD
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
docker run \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e MAX_JOBS \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e SKIP_SCCACHE_INITIALIZATION=1 \
|
||||
-e TORCH_CUDA_ARCH_LIST \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--tty \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-v "${GITHUB_WORKSPACE}../custom-op-build:/var/lib/jenkins/custom-op-build" \
|
||||
-v "${GITHUB_WORKSPACE}../custom-backend-build:/var/lib/jenkins/custom-backend-build" \
|
||||
-v "${GITHUB_WORKSPACE}../jit-hook-build:/var/lib/jenkins/jit-hook-build" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}" \
|
||||
sh -c 'sudo chown -R jenkins ../ && .jenkins/pytorch/build.sh'
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)/../":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
|
||||
- name: Archive artifacts into zip
|
||||
run: |
|
||||
(cd "${GITHUB_WORKSPACE}/../" && zip -r pytorch/artifacts.zip pytorch/dist pytorch/build custom-op-build/ custom-backend-build/ jit-hook-build/)
|
||||
- uses: actions/upload-artifact@v2
|
||||
name: Store PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
retention-days: 30
|
||||
if-no-files-found: error
|
||||
path:
|
||||
artifacts.zip
|
||||
- name: Clean up docker images
|
||||
if: always()
|
||||
run: |
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test:
|
||||
runs-on: !{{ test_runner_type }}
|
||||
needs:
|
||||
- calculate-docker-image
|
||||
- build
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
|
||||
steps:
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
|
||||
- name: Checkout PyTorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Log in to ECR
|
||||
run: |
|
||||
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
|
||||
bash /tmp/ecr-login.sh
|
||||
rm /tmp/ecr-login.sh
|
||||
- name: Pull docker image
|
||||
run: |
|
||||
docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') }}
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: actions/download-artifact@v2
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
(cd "${GITHUB_WORKSPACE}/../" && unzip -q pytorch/artifacts.zip)
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Test PyTorch
|
||||
run: |
|
||||
SCCACHE_MAX_JOBS=$(( $(nproc) - 1 ))
|
||||
MEMORY_LIMIT_MAX_JOBS=8 # our "linux.2xlarge" runner has 16 vCPUs, if we use all of them we'll OOM
|
||||
export MAX_JOBS=$(( SCCACHE_MAX_JOBS > MEMORY_LIMIT_MAX_JOBS ? MEMORY_LIMIT_MAX_JOBS : SCCACHE_MAX_JOBS ))
|
||||
# Why the three volume mounts here? So test binaries are put in the correct spot
|
||||
# NOTE: You cannot volume mount ${GITHUB_WORKSPACE}../:/var/lib/jenkins since sccache connection will hang
|
||||
# See CUSTOM_OP_BUILD, JIT_HOOK_BUILD, CUSTOM_BACKEND_BUILD
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086
|
||||
docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e IN_CI \
|
||||
-e MAX_JOBS \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-v "${GITHUB_WORKSPACE}../custom-op-build:/var/lib/jenkins/custom-op-build" \
|
||||
-v "${GITHUB_WORKSPACE}../custom-backend-build:/var/lib/jenkins/custom-backend-build" \
|
||||
-v "${GITHUB_WORKSPACE}../jit-hook-build:/var/lib/jenkins/jit-hook-build" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}" \
|
||||
sh -c 'sudo chown -R jenkins ../ && pip install dist/*.whl && .jenkins/pytorch/test.sh'
|
||||
- name: Clean up docker images
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
24
.github/workflows/lint.yml
vendored
24
.github/workflows/lint.yml
vendored
@ -95,6 +95,23 @@ jobs:
|
||||
run: |
|
||||
python2 setup.py | grep "Python 2 has reached end-of-life and is no longer supported by PyTorch."
|
||||
|
||||
templates:
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.x
|
||||
architecture: x64
|
||||
- name: Install Jinja2
|
||||
run: pip install Jinja2
|
||||
- name: Checkout PyTorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Regenerate workflows
|
||||
run: .github/scripts/generate_linux_ci_workflows.py
|
||||
- name: Assert that regenerating the workflows didn't change them
|
||||
run: .github/scripts/report_git_status.sh
|
||||
|
||||
toc:
|
||||
runs-on: ubuntu-18.04
|
||||
# https://github.com/actions/virtual-environments/issues/599#issuecomment-602754687
|
||||
@ -115,12 +132,7 @@ jobs:
|
||||
markdown-toc --bullets='-' -i "$FILE"
|
||||
done
|
||||
- name: Assert that regenerating the ToCs didn't change them
|
||||
run: |
|
||||
set -eux
|
||||
CHANGES=$(git status --porcelain)
|
||||
echo "$CHANGES"
|
||||
git diff
|
||||
[ -z "$CHANGES" ]
|
||||
run: .github/scripts/report_git_status.sh
|
||||
|
||||
flake8-py3:
|
||||
runs-on: ubuntu-18.04
|
||||
|
193
.github/workflows/pytorch-linux-xenial-py3.6-gcc5.4.yml
vendored
Normal file
193
.github/workflows/pytorch-linux-xenial-py3.6-gcc5.4.yml
vendored
Normal file
@ -0,0 +1,193 @@
|
||||
# @generated by .github/scripts/generate_linux_ci_workflows.py, Do not update manually
|
||||
#
|
||||
# Template is at: .github/templates/linux_ci_workflow.yml
|
||||
# Generation script: .github/scripts/generate_linux_ci_workflows.py
|
||||
name: Linux CI (pytorch-linux-xenial-py3.6-gcc5.4)
|
||||
|
||||
on:
|
||||
# TODO: Enable pull_request builds when we can verify capacity can be met by auto-scalers
|
||||
# pull_request:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- release/*
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
BUILD_ENVIRONMENT: pytorch-linux-xenial-py3.6-gcc5.4
|
||||
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4
|
||||
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
|
||||
TORCH_CUDA_ARCH_LIST: 5.2
|
||||
IN_CI: 1
|
||||
|
||||
jobs:
|
||||
calculate-docker-image:
|
||||
runs-on: ubuntu-18.04
|
||||
outputs:
|
||||
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Calculate docker image tag
|
||||
id: calculate-tag
|
||||
run: |
|
||||
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
|
||||
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
|
||||
build:
|
||||
runs-on: linux.2xlarge
|
||||
needs: calculate-docker-image
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
|
||||
steps:
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
|
||||
- name: Checkout PyTorch
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # deep clone, to allow sharding to use git rev-list
|
||||
submodules: recursive
|
||||
- name: Log in to ECR
|
||||
run: |
|
||||
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
|
||||
bash /tmp/ecr-login.sh
|
||||
rm /tmp/ecr-login.sh
|
||||
- name: Pull docker image
|
||||
run: |
|
||||
docker pull "${DOCKER_IMAGE}"
|
||||
- name: Create test binary build directories
|
||||
run: |
|
||||
mkdir -pv ../custom-op-build
|
||||
mkdir -pv ../custom-backend-build
|
||||
mkdir -pv ../jit-hook-build
|
||||
- name: Build PyTorch
|
||||
run: |
|
||||
SCCACHE_MAX_JOBS=$(( $(nproc) - 1 ))
|
||||
MEMORY_LIMIT_MAX_JOBS=8 # our "linux.2xlarge" runner has 16 vCPUs, if we use all of them we'll OOM
|
||||
export MAX_JOBS=$(( SCCACHE_MAX_JOBS > MEMORY_LIMIT_MAX_JOBS ? MEMORY_LIMIT_MAX_JOBS : SCCACHE_MAX_JOBS ))
|
||||
# Why the three volume mounts here? So test binaries are put in the correct spot
|
||||
# NOTE: You cannot volume mount ${GITHUB_WORKSPACE}../:/var/lib/jenkins since sccache connection will hang
|
||||
# See CUSTOM_OP_BUILD, JIT_HOOK_BUILD, CUSTOM_BACKEND_BUILD
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
docker run \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e MAX_JOBS \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e SKIP_SCCACHE_INITIALIZATION=1 \
|
||||
-e TORCH_CUDA_ARCH_LIST \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--tty \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-v "${GITHUB_WORKSPACE}../custom-op-build:/var/lib/jenkins/custom-op-build" \
|
||||
-v "${GITHUB_WORKSPACE}../custom-backend-build:/var/lib/jenkins/custom-backend-build" \
|
||||
-v "${GITHUB_WORKSPACE}../jit-hook-build:/var/lib/jenkins/jit-hook-build" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}" \
|
||||
sh -c 'sudo chown -R jenkins ../ && .jenkins/pytorch/build.sh'
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)/../":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
|
||||
- name: Archive artifacts into zip
|
||||
run: |
|
||||
(cd "${GITHUB_WORKSPACE}/../" && zip -r pytorch/artifacts.zip pytorch/dist pytorch/build custom-op-build/ custom-backend-build/ jit-hook-build/)
|
||||
- uses: actions/upload-artifact@v2
|
||||
name: Store PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
retention-days: 30
|
||||
if-no-files-found: error
|
||||
path:
|
||||
artifacts.zip
|
||||
- name: Clean up docker images
|
||||
if: always()
|
||||
run: |
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
test:
|
||||
runs-on: linux.2xlarge
|
||||
needs:
|
||||
- calculate-docker-image
|
||||
- build
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
|
||||
steps:
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
|
||||
- name: Checkout PyTorch
|
||||
uses: actions/checkout@v2
|
||||
- name: Log in to ECR
|
||||
run: |
|
||||
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
|
||||
bash /tmp/ecr-login.sh
|
||||
rm /tmp/ecr-login.sh
|
||||
- name: Pull docker image
|
||||
run: |
|
||||
docker pull "${DOCKER_IMAGE}"
|
||||
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
||||
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') }}
|
||||
run: |
|
||||
bash .github/scripts/install_nvidia_utils_linux.sh
|
||||
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
|
||||
- name: Determine shm-size
|
||||
run: |
|
||||
shm_size="1g"
|
||||
case "${BUILD_ENVIRONMENT}" in
|
||||
*cuda*)
|
||||
shm_size="2g"
|
||||
;;
|
||||
*rocm*)
|
||||
shm_size="8g"
|
||||
;;
|
||||
esac
|
||||
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
|
||||
- uses: actions/download-artifact@v2
|
||||
name: Download PyTorch Build Artifacts
|
||||
with:
|
||||
name: ${{ env.BUILD_ENVIRONMENT }}
|
||||
- name: Unzip artifacts
|
||||
run: |
|
||||
(cd "${GITHUB_WORKSPACE}/../" && unzip -q pytorch/artifacts.zip)
|
||||
- name: Output disk space left
|
||||
run: |
|
||||
sudo df -H
|
||||
- name: Test PyTorch
|
||||
run: |
|
||||
SCCACHE_MAX_JOBS=$(( $(nproc) - 1 ))
|
||||
MEMORY_LIMIT_MAX_JOBS=8 # our "linux.2xlarge" runner has 16 vCPUs, if we use all of them we'll OOM
|
||||
export MAX_JOBS=$(( SCCACHE_MAX_JOBS > MEMORY_LIMIT_MAX_JOBS ? MEMORY_LIMIT_MAX_JOBS : SCCACHE_MAX_JOBS ))
|
||||
# Why the three volume mounts here? So test binaries are put in the correct spot
|
||||
# NOTE: You cannot volume mount ${GITHUB_WORKSPACE}../:/var/lib/jenkins since sccache connection will hang
|
||||
# See CUSTOM_OP_BUILD, JIT_HOOK_BUILD, CUSTOM_BACKEND_BUILD
|
||||
# TODO: Stop building test binaries as part of the build phase
|
||||
# Used for GPU_FLAG since that doesn't play nice
|
||||
# shellcheck disable=SC2086
|
||||
docker run \
|
||||
${GPU_FLAG:-} \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e IN_CI \
|
||||
-e MAX_JOBS \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--shm-size="${SHM_SIZE}" \
|
||||
--tty \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-v "${GITHUB_WORKSPACE}../custom-op-build:/var/lib/jenkins/custom-op-build" \
|
||||
-v "${GITHUB_WORKSPACE}../custom-backend-build:/var/lib/jenkins/custom-backend-build" \
|
||||
-v "${GITHUB_WORKSPACE}../jit-hook-build:/var/lib/jenkins/jit-hook-build" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}" \
|
||||
sh -c 'sudo chown -R jenkins ../ && pip install dist/*.whl && .jenkins/pytorch/test.sh'
|
||||
- name: Clean up docker images
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v alpine chown -R "$(id -u):$(id -g)" .
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
9
.gitignore
vendored
9
.gitignore
vendored
@ -292,3 +292,12 @@ bazel-*
|
||||
# direnv, posh-direnv
|
||||
.envrc
|
||||
.psenvrc
|
||||
|
||||
# generated shellcheck directories
|
||||
.shellcheck_generated*/
|
||||
|
||||
# zip archives
|
||||
*.zip
|
||||
|
||||
# core dump files
|
||||
core.*
|
||||
|
@ -59,6 +59,17 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda11* ]]; then
|
||||
export BUILD_SPLIT_CUDA=ON
|
||||
fi
|
||||
|
||||
if [[ ${BUILD_ENVIRONMENT} == *"pure_torch"* ]]; then
|
||||
export BUILD_CAFFE2=OFF
|
||||
fi
|
||||
|
||||
if [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
|
||||
export ATEN_THREADING=TBB
|
||||
export USE_TBB=1
|
||||
elif [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
|
||||
export ATEN_THREADING=NATIVE
|
||||
fi
|
||||
|
||||
# TODO: Don't run this...
|
||||
pip_install -r requirements.txt || true
|
||||
|
||||
@ -234,7 +245,7 @@ else
|
||||
CUSTOM_OP_TEST="$PWD/test/custom_operator"
|
||||
python --version
|
||||
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
|
||||
mkdir "$CUSTOM_OP_BUILD"
|
||||
mkdir -p "$CUSTOM_OP_BUILD"
|
||||
pushd "$CUSTOM_OP_BUILD"
|
||||
cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)"
|
||||
make VERBOSE=1
|
||||
@ -246,7 +257,7 @@ else
|
||||
JIT_HOOK_TEST="$PWD/test/jit_hooks"
|
||||
python --version
|
||||
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
|
||||
mkdir "$JIT_HOOK_BUILD"
|
||||
mkdir -p "$JIT_HOOK_BUILD"
|
||||
pushd "$JIT_HOOK_BUILD"
|
||||
cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)"
|
||||
make VERBOSE=1
|
||||
@ -257,7 +268,7 @@ else
|
||||
CUSTOM_BACKEND_BUILD="$PWD/../custom-backend-build"
|
||||
CUSTOM_BACKEND_TEST="$PWD/test/custom_backend"
|
||||
python --version
|
||||
mkdir "$CUSTOM_BACKEND_BUILD"
|
||||
mkdir -p "$CUSTOM_BACKEND_BUILD"
|
||||
pushd "$CUSTOM_BACKEND_BUILD"
|
||||
cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)"
|
||||
make VERBOSE=1
|
||||
|
@ -72,7 +72,16 @@ if [[ "$BUILD_ENVIRONMENT" != *pytorch-win-* ]]; then
|
||||
# Save sccache logs to file
|
||||
sccache --stop-server || true
|
||||
rm ~/sccache_error.log || true
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
|
||||
if [[ -n "${SKIP_SCCACHE_INITIALIZATION:-}" ]]; then
|
||||
# sccache --start-server seems to hang forever on self hosted runners for GHA
|
||||
# so let's just go ahead and skip the --start-server altogether since it seems
|
||||
# as though sccache still gets used even when the sscache server isn't started
|
||||
# explicitly
|
||||
echo "Skipping sccache server initialization, setting environment variables"
|
||||
export SCCACHE_IDLE_TIMEOUT=1200
|
||||
export SCCACHE_ERROR_LOG=~/sccache_error.log
|
||||
export RUST_LOG=sccache::server=error
|
||||
elif [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
|
||||
SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
|
||||
else
|
||||
# increasing SCCACHE_IDLE_TIMEOUT so that extension_backend_test.cpp can build after this PR:
|
||||
|
@ -51,7 +51,11 @@ test_python_all() {
|
||||
export GLOO_SOCKET_IFNAME=lo0
|
||||
echo "Ninja version: $(ninja --version)"
|
||||
|
||||
if [ -n "$CIRCLE_PULL_REQUEST" ]; then
|
||||
# Try to pull value from CIRCLE_PULL_REQUEST first then GITHUB_HEAD_REF second
|
||||
# CIRCLE_PULL_REQUEST comes from CircleCI
|
||||
# GITHUB_HEAD_REF comes from Github Actions
|
||||
IN_PULL_REQUEST=${CIRCLE_PULL_REQUEST:-${GITHUB_HEAD_REF:-}}
|
||||
if [ -n "$IN_PULL_REQUEST" ]; then
|
||||
DETERMINE_FROM=$(mktemp)
|
||||
file_diff_from_base "$DETERMINE_FROM"
|
||||
fi
|
||||
|
@ -115,7 +115,11 @@ elif [[ "${BUILD_ENVIRONMENT}" == *-NO_AVX2-* ]]; then
|
||||
export ATEN_CPU_CAPABILITY=avx
|
||||
fi
|
||||
|
||||
if [ -n "$CIRCLE_PULL_REQUEST" ] && [[ "$BUILD_ENVIRONMENT" != *coverage* ]]; then
|
||||
# Try to pull value from CIRCLE_PULL_REQUEST first then GITHUB_HEAD_REF second
|
||||
# CIRCLE_PULL_REQUEST comes from CircleCI
|
||||
# GITHUB_HEAD_REF comes from Github Actions
|
||||
IN_PULL_REQUEST=${CIRCLE_PULL_REQUEST:-${GITHUB_HEAD_REF:-}}
|
||||
if [ -n "$IN_PULL_REQUEST" ] && [[ "$BUILD_ENVIRONMENT" != *coverage* ]]; then
|
||||
DETERMINE_FROM=$(mktemp)
|
||||
file_diff_from_base "$DETERMINE_FROM"
|
||||
fi
|
||||
|
@ -42,12 +42,16 @@ fi
|
||||
|
||||
export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers
|
||||
|
||||
if [ -n "$CIRCLE_PULL_REQUEST" ]; then
|
||||
# Try to pull value from CIRCLE_PULL_REQUEST first then GITHUB_HEAD_REF second
|
||||
# CIRCLE_PULL_REQUEST comes from CircleCI
|
||||
# GITHUB_HEAD_REF comes from Github Actions
|
||||
IN_PULL_REQUEST=${CIRCLE_PULL_REQUEST:-${GITHUB_HEAD_REF:-}}
|
||||
if [ -n "$IN_PULL_REQUEST" ]; then
|
||||
DETERMINE_FROM="${TMP_DIR}/determine_from"
|
||||
file_diff_from_base "$DETERMINE_FROM"
|
||||
fi
|
||||
|
||||
if [[ "${CIRCLE_JOB}" == *11* ]]; then
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *cuda11* ]]; then
|
||||
export BUILD_SPLIT_CUDA=ON
|
||||
fi
|
||||
|
||||
|
11
Makefile
11
Makefile
@ -14,8 +14,19 @@ ios:
|
||||
|
||||
clean: # This will remove ALL build folders.
|
||||
@rm -r build*/
|
||||
@$(RM) -r $(SHELLCHECK_GHA_GENERATED_FOLDER)
|
||||
|
||||
linecount:
|
||||
@cloc --read-lang-def=caffe.cloc caffe2 || \
|
||||
echo "Cloc is not available on the machine. You can install cloc with " && \
|
||||
echo " sudo apt-get install cloc"
|
||||
|
||||
SHELLCHECK_GHA_GENERATED_FOLDER=.shellcheck_generated_gha
|
||||
shellcheck-gha:
|
||||
@$(RM) -r $(SHELLCHECK_GHA_GENERATED_FOLDER)
|
||||
tools/extract_scripts.py --out=$(SHELLCHECK_GHA_GENERATED_FOLDER)
|
||||
tools/run_shellcheck.sh $(SHELLCHECK_GHA_GENERATED_FOLDER)
|
||||
|
||||
generate-gha-workflows:
|
||||
./.github/scripts/generate_linux_ci_workflows.py
|
||||
$(MAKE) shellcheck-gha
|
||||
|
Reference in New Issue
Block a user