Compare commits

..

6 Commits

Author SHA1 Message Date
7cedac341d Merge branch 'master' into issue#58739 2021-06-17 15:55:42 +02:00
7781b9f16f add blank lines for readability 2021-06-17 15:55:35 +02:00
2119e032fb revert submodule update 2021-06-17 15:54:36 +02:00
8a2bcf9ebf revert submodule update 2021-06-17 16:40:38 +05:30
1ee019fa52 reverted changes 2021-06-17 16:13:25 +05:30
0864aaaeb7 add support for constant 2021-06-17 14:00:51 +05:30
3701 changed files with 89994 additions and 186680 deletions

View File

@ -44,7 +44,7 @@ jobs:
is_official_build: ${{ parameters.is_official_build}}
# Sync and update PyTorch submodules
- bash: git submodule update --init --recursive --jobs 0
- bash: git submodule update --init --recursive
displayName: Update PyTorch submodules
# Build PyTorch and run unit tests - no packaging

View File

@ -47,7 +47,7 @@ jobs:
is_official_build: ${{ parameters.is_official_build}}
# Sync and update PyTorch submodules
- script: git submodule update --init --recursive --jobs 0
- script: git submodule update --init --recursive
displayName: Update PyTorch submodules
# Build PyTorch and run unit tests - no packaging

View File

@ -1,26 +0,0 @@
parameters:
name: ''
pool: ''
customMatrixes: ''
jobs:
- job: ${{parameters.name}}
timeoutInMinutes: 600
strategy:
matrix:
${{ insert }}: ${{parameters.customMatrixes}}
pool:
name: ${{ parameters.pool}}
steps:
# Clone PyTorch Tests repository
- bash: |
B64_PAT=$(echo -n ":$_ADOTOKEN" | base64)
git -c http.extraHeader="Authorization: Basic ${B64_PAT}" clone $(AZURE_DEVOPS_PYTORCH_TESTS_REPO_URL)
cd pytorch_tests
git checkout $(PYTORCH_TESTS_CHECKOUT_BRANCH)
env:
_ADOTOKEN: $(AZURE_DEVOPS_CLI_PAT)
displayName: Clone PyTorch Tests repo
- bash: |
bash $(Build.SourcesDirectory)/pytorch_tests/webapp/notify_webapp.sh
displayName: Notify Webapp

View File

@ -33,7 +33,7 @@ jobs:
# Clone PyTorch Tests repository
- bash: |
B64_PAT=$(echo -n ":$_ADOTOKEN" | base64)
B64_PAT=$(printf "%s"":$_ADOTOKEN" | base64)
git -c http.extraHeader="Authorization: Basic ${B64_PAT}" clone $(AZURE_DEVOPS_PYTORCH_TESTS_REPO_URL)
cd pytorch_tests
git checkout $(PYTORCH_TESTS_CHECKOUT_BRANCH)

View File

@ -8,7 +8,7 @@ steps:
connectionType: 'connectedServiceName'
serviceConnection: circleciconn
method: 'POST'
headers: '{"Content-Type":"application/json", "BranchName":"$(_TARGET_BRANCH_TO_CHECK)", "JobName":"$(TARGET_CIRCLECI_BUILD_PR)", "PRNumber":"$(_TARGET_PR_NUMBER)", "TargetCommit":"$(_TARGET_COMMIT)", "PlanUrl":"$(System.CollectionUri)", "ProjectId":"$(System.TeamProjectId)", "HubName":"$(System.HostType)", "PlanId":"$(System.PlanId)", "JobId":"$(System.JobId)", "TimelineId":"$(System.TimelineId)", "TaskInstanceId":"$(System.TaskInstanceId)", "AuthToken":"$(System.AccessToken)"}'
headers: '{"Content-Type":"application/json", "BranchName":"$(_TARGET_BRANCH_TO_CHECK)", "JobName":"$(TARGET_CIRCLECI_BUILD_PR)", "PRNumber":"$(_NUMBER_BUILD_PR)", "TargetCommit":"$(_TARGET_COMMIT)", "PlanUrl":"$(System.CollectionUri)", "ProjectId":"$(System.TeamProjectId)", "HubName":"$(System.HostType)", "PlanId":"$(System.PlanId)", "JobId":"$(System.JobId)", "TimelineId":"$(System.TimelineId)", "TaskInstanceId":"$(System.TaskInstanceId)", "AuthToken":"$(System.AccessToken)"}'
body: ''
urlSuffix: 'api/JobStatus'
waitForCompletion: true

View File

@ -48,13 +48,3 @@ stages:
_PYTHON_VERSION: $(PYTHON_VERSION_WIN_2)
_CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_WIN_2)
_RUN_TESTS: $(RUN_TESTS_WIN)
- stage: 'NotifyWebapp'
displayName: 'Notify Webapp that pipeline is finished'
dependsOn: NightlyCustomTests
condition: succeededOrFailed()
jobs:
- template: job_templates/notify-webapp-template.yml
parameters:
name: ubuntu_1804_CPU
pool: $(BUILD_POOL_LIN_1)

View File

@ -22,7 +22,7 @@ stages:
- template: job_templates/wheel-wait-template.yml
variables:
_TARGET_BRANCH_TO_CHECK: ${{parameters.GitHubPyTorchPRTrigger.TARGET_BRANCH_TO_CHECK_AZ_DEVOPS_PR}}
_TARGET_PR_NUMBER: ${{parameters.GitHubPyTorchPRTrigger.PR_NUMBER}}
_NUMBER_BUILD_PR: ${{parameters.GitHubPyTorchPRTrigger.PR_NUMBER}}
_TARGET_COMMIT: ${{parameters.GitHubPyTorchPRTrigger.TARGET_COMMIT}}
- stage: 'PRCustomTests'
@ -40,23 +40,7 @@ stages:
_CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_PR)
_TARGET_CIRCLECI_BUILD: $(TARGET_CIRCLECI_BUILD_PR)
_TARGET_BRANCH_TO_CHECK: ${{parameters.GitHubPyTorchPRTrigger.TARGET_BRANCH_TO_CHECK_AZ_DEVOPS_PR}}
_TARGET_PR_NUMBER: ${{parameters.GitHubPyTorchPRTrigger.PR_NUMBER}}
_NUMBER_BUILD_PR: ${{parameters.GitHubPyTorchPRTrigger.PR_NUMBER}}
_TARGET_COMMIT: ${{parameters.GitHubPyTorchPRTrigger.TARGET_COMMIT}}
_DOCKER_IMAGE: $(DOCKER_IMAGE_PR)
_RUN_TESTS: $(RUN_TESTS_PR)
- stage: 'NotifyWebapp'
displayName: 'Notify Webapp that pipeline is finished'
dependsOn: PRCustomTests
condition: succeededOrFailed()
jobs:
- template: job_templates/notify-webapp-template.yml
parameters:
name: ubuntu_1804_CPU
pool: $(BUILD_POOL_LIN_1)
customMatrixes:
PR_Notify_WebApp:
_TARGET_CIRCLECI_BUILD: $(TARGET_CIRCLECI_BUILD_PR)
_TARGET_BRANCH_TO_CHECK: ${{parameters.GitHubPyTorchPRTrigger.TARGET_BRANCH_TO_CHECK_AZ_DEVOPS_PR}}
_TARGET_PR_NUMBER: ${{parameters.GitHubPyTorchPRTrigger.PR_NUMBER}}
_TARGET_COMMIT: ${{parameters.GitHubPyTorchPRTrigger.TARGET_COMMIT}}

View File

@ -1,13 +1,3 @@
build --copt=--std=c++14
build --copt=-I.
build --copt=-isystem --copt bazel-out/k8-fastbuild/bin
# Configuration to disable tty features for environments like CI
build:no-tty --curses no
build:no-tty --progress_report_interval 10
build:no-tty --show_progress_rate_limit 10
# Configuration to build with GPU support
build:gpu --define=cuda=true
# define a separate build folder for faster switching between configs
build:gpu --platform_suffix=-gpu

View File

@ -1 +1 @@
4.2.1
3.1.0

View File

@ -343,6 +343,7 @@ All linux builds occur in docker images. The docker images are
* Has ALL CUDA versions installed. The script pytorch/builder/conda/switch_cuda_version.sh sets /usr/local/cuda to a symlink to e.g. /usr/local/cuda-10.0 to enable different CUDA builds
* Also used for cpu builds
* pytorch/manylinux-cuda90
* pytorch/manylinux-cuda92
* pytorch/manylinux-cuda100
* Also used for cpu builds

View File

@ -126,6 +126,9 @@ class PackageFormatConfigNode(ConfigNode):
self.props["python_versions"] = python_versions
self.props["package_format"] = package_format
# XXX Disabling conda for 11.3 as there's currently no appropriate cudatoolkit available
if package_format == "conda":
self.props["gpu_versions"] = filter(lambda x: x != "cuda113", self.find_prop("gpu_versions"))
def get_children(self):
if self.find_prop("os_name") == "linux":

View File

@ -124,9 +124,9 @@ class Conf(object):
Output looks similar to:
- binary_upload:
name: binary_linux_manywheel_3_7m_cu113_devtoolset7_nightly_upload
name: binary_linux_manywheel_3_7m_cu92_devtoolset7_nightly_upload
context: org-member
requires: binary_linux_manywheel_3_7m_cu113_devtoolset7_nightly_test
requires: binary_linux_manywheel_3_7m_cu92_devtoolset7_nightly_test
filters:
branches:
only:
@ -134,7 +134,7 @@ class Conf(object):
tags:
only: /v[0-9]+(\\.[0-9]+)*-rc[0-9]+/
package_type: manywheel
upload_subfolder: cu113
upload_subfolder: cu92
"""
return {
"binary_upload": OrderedDict({

View File

@ -7,19 +7,26 @@ CONFIG_TREE_DATA = [
("5.4", [ # All this subtree rebases to master and then build
("3.6", [
("important", [X(True)]),
("parallel_tbb", [X(True)]),
("parallel_native", [X(True)]),
("pure_torch", [X(True)]),
]),
]),
# TODO: bring back libtorch test
("7", [X("3.6")]),
]),
("clang", [
("7", [
("5", [
("3.6", [
("asan", [
(True, [
("shard_test", [XImportant(True)]),
]),
]),
]),
]),
("7", [
("3.6", [
("onnx", [XImportant(True)]),
]),
]),
@ -27,28 +34,38 @@ CONFIG_TREE_DATA = [
("cuda", [
("10.2", [
("3.6", [
# Build are needed for slow_gradcheck
('build_only', [X(True)]),
("shard_test", [X(True)]),
("slow_gradcheck", [
# If you update this slow gradcheck, you should
# also update docker_definitions.py to make sure
# the docker image match the config used here
(True, [
('shard_test', [XImportant(True)]),
]),
]),
# UNCOMMENT THE BELOW TO REENABLE LIBTORCH
# ("libtorch", [
# (True, [
# ('build_only', [X(True)]),
# ]),
# ]),
("libtorch", [
(True, [
('build_only', [X(True)]),
]),
]),
]),
]),
("11.1", [
("3.8", [
("shard_test", [XImportant(True)]),
("libtorch", [
(True, [
('build_only', [X(True)]),
]),
]),
]),
]),
]),
]),
("bionic", [
("clang", [
("9", [
("3.6", [
("noarch", [XImportant(True)]),
]),
]),
("9", [
("3.6", [
("xla", [XImportant(True)]),
@ -56,14 +73,31 @@ CONFIG_TREE_DATA = [
]),
]),
]),
# @jithunnair-amd believes Jenkins builds are sufficient
# ("rocm", [
# ("3.9", [
# ("3.6", [
# ('build_only', [XImportant(True)]),
# ]),
# ]),
# ]),
("cuda", [
("10.2", [
("3.9", [
("shard_test", [XImportant(True)]),
]),
]),
]),
("gcc", [
("9", [
("3.8", [
("coverage", [
(True, [
("shard_test", [XImportant(True)]),
]),
]),
]),
]),
]),
("rocm", [
("3.9", [
("3.6", [
('build_only', [XImportant(True)]),
]),
]),
]),
]),
]

View File

@ -31,7 +31,6 @@ class Conf:
is_libtorch: bool = False
is_important: bool = False
parallel_backend: Optional[str] = None
build_only: bool = False
@staticmethod
def is_test_phase(phase):
@ -113,8 +112,6 @@ class Conf:
parameters["resource_class"] = "xlarge"
if hasattr(self, 'filters'):
parameters['filters'] = self.filters
if self.build_only:
parameters['build_only'] = miniutils.quote(str(int(True)))
return parameters
def gen_workflow_job(self, phase):
@ -178,6 +175,35 @@ class DocPushConf(object):
}
}
# TODO Convert these to graph nodes
def gen_dependent_configs(xenial_parent_config):
extra_parms = [
(["multigpu"], "large"),
(["nogpu", "NO_AVX2"], None),
(["nogpu", "NO_AVX"], None),
(["slow"], "medium"),
]
configs = []
for parms, gpu in extra_parms:
c = Conf(
xenial_parent_config.distro,
["py3"] + parms,
pyver=xenial_parent_config.pyver,
cuda_version=xenial_parent_config.cuda_version,
restrict_phases=["test"],
gpu_resource=gpu,
parent_build=xenial_parent_config,
is_important=False,
)
configs.append(c)
return configs
def gen_docs_configs(xenial_parent_config):
configs = []
@ -185,7 +211,7 @@ def gen_docs_configs(xenial_parent_config):
HiddenConf(
"pytorch_python_doc_build",
parent_build=xenial_parent_config,
filters=gen_filter_dict(branches_list=["master", "nightly"],
filters=gen_filter_dict(branches_list=r"/.*/",
tags_list=RC_PATTERN),
)
)
@ -201,7 +227,7 @@ def gen_docs_configs(xenial_parent_config):
HiddenConf(
"pytorch_cpp_doc_build",
parent_build=xenial_parent_config,
filters=gen_filter_dict(branches_list=["master", "nightly"],
filters=gen_filter_dict(branches_list=r"/.*/",
tags_list=RC_PATTERN),
)
)
@ -212,6 +238,13 @@ def gen_docs_configs(xenial_parent_config):
branch="master",
)
)
configs.append(
HiddenConf(
"pytorch_doc_test",
parent_build=xenial_parent_config
)
)
return configs
@ -336,7 +369,6 @@ def instantiate_configs(only_slow_gradcheck):
is_libtorch=is_libtorch,
is_important=is_important,
parallel_backend=parallel_backend,
build_only=build_only,
)
# run docs builds on "pytorch-linux-xenial-py3.6-gcc5.4". Docs builds
@ -357,19 +389,19 @@ def instantiate_configs(only_slow_gradcheck):
tags_list=RC_PATTERN)
c.dependent_tests = gen_docs_configs(c)
if cuda_version == "10.2" and python_version == "3.6" and not is_libtorch and not is_slow_gradcheck:
c.dependent_tests = gen_dependent_configs(c)
if (
compiler_name != "clang"
and not rocm_version
compiler_name == "gcc"
and compiler_version == "5.4"
and not is_libtorch
and not is_vulkan
and not is_pure_torch
and not is_noarch
and not is_slow_gradcheck
and not only_slow_gradcheck
and not build_only
and parallel_backend is None
):
distributed_test = Conf(
c.gen_build_name("") + "distributed",
bc_breaking_check = Conf(
"backward-compatibility-check",
[],
is_xla=False,
restrict_phases=["test"],
@ -377,7 +409,7 @@ def instantiate_configs(only_slow_gradcheck):
is_important=True,
parent_build=c,
)
c.dependent_tests.append(distributed_test)
c.dependent_tests.append(bc_breaking_check)
config_list.append(c)

View File

@ -6,39 +6,35 @@ from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN
# TODO: make this generated from a matrix rather than just a static list
IMAGE_NAMES = [
"pytorch-linux-bionic-cuda10.2-cudnn7-py3.8-gcc9",
"pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7",
"pytorch-linux-bionic-py3.6-clang9",
"pytorch-linux-bionic-cuda10.2-cudnn7-py3.6-clang9",
"pytorch-linux-bionic-py3.8-gcc9",
"pytorch-linux-xenial-cuda10-cudnn7-py3-gcc7",
"pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7",
"pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
"pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
"pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7",
"pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
"pytorch-linux-xenial-py3-clang5-asan",
"pytorch-linux-xenial-py3-clang7-asan",
"pytorch-linux-xenial-py3-clang7-onnx",
"pytorch-linux-xenial-py3.8",
"pytorch-linux-xenial-py3.6-clang7",
"pytorch-linux-xenial-py3.6-gcc5.4", # this one is used in doc builds
"pytorch-linux-xenial-py3.6-gcc7.2",
"pytorch-linux-xenial-py3.6-gcc7",
"pytorch-linux-bionic-rocm3.9-py3.6",
"pytorch-linux-bionic-rocm4.0.1-py3.6",
"pytorch-linux-bionic-rocm4.1-py3.6",
"pytorch-linux-bionic-rocm4.2-py3.6",
"pytorch-linux-bionic-rocm4.3.1-py3.6",
]
# This entry should be an element from the list above
# This should contain the image matching the "slow_gradcheck" entry in
# pytorch_build_data.py
SLOW_GRADCHECK_IMAGE_NAME = "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
def get_workflow_jobs(only_slow_gradcheck=False):
def get_workflow_jobs():
"""Generates a list of docker image build definitions"""
ret = []
for image_name in IMAGE_NAMES:
if only_slow_gradcheck and image_name is not SLOW_GRADCHECK_IMAGE_NAME:
continue
parameters = OrderedDict({
"name": quote(f"docker-{image_name}"),
"image_name": quote(image_name),

View File

@ -0,0 +1,78 @@
import cimodel.lib.miniutils as miniutils
from cimodel.data.simple.util.versions import MultiPartVersion, CudaVersion
from cimodel.data.simple.util.docker_constants import DOCKER_IMAGE_BASIC, DOCKER_IMAGE_CUDA_10_2
class GeConfigTestJob:
def __init__(self,
py_version,
gcc_version,
cuda_version,
variant_parts,
extra_requires,
use_cuda_docker=False,
build_env_override=None):
self.py_version = py_version
self.gcc_version = gcc_version
self.cuda_version = cuda_version
self.variant_parts = variant_parts
self.extra_requires = extra_requires
self.use_cuda_docker = use_cuda_docker
self.build_env_override = build_env_override
def get_all_parts(self, with_dots):
maybe_py_version = self.py_version.render_dots_or_parts(with_dots) if self.py_version else []
maybe_gcc_version = self.gcc_version.render_dots_or_parts(with_dots) if self.gcc_version else []
maybe_cuda_version = self.cuda_version.render_dots_or_parts(with_dots) if self.cuda_version else []
common_parts = [
"pytorch",
"linux",
"xenial",
] + maybe_cuda_version + maybe_py_version + maybe_gcc_version
return common_parts + self.variant_parts
def gen_tree(self):
resource_class = "gpu.medium" if self.use_cuda_docker else "large"
docker_image = DOCKER_IMAGE_CUDA_10_2 if self.use_cuda_docker else DOCKER_IMAGE_BASIC
full_name = "_".join(self.get_all_parts(False))
build_env = self.build_env_override or "-".join(self.get_all_parts(True))
props_dict = {
"name": full_name,
"build_environment": build_env,
"requires": self.extra_requires,
"resource_class": resource_class,
"docker_image": docker_image,
}
if self.use_cuda_docker:
props_dict["use_cuda_docker_runtime"] = miniutils.quote(str(1))
return [{"pytorch_linux_test": props_dict}]
WORKFLOW_DATA = [
GeConfigTestJob(
MultiPartVersion([3, 6], "py"),
MultiPartVersion([5, 4], "gcc"),
None,
["jit_legacy", "test"],
["pytorch_linux_xenial_py3_6_gcc5_4_build"]),
GeConfigTestJob(
None,
None,
CudaVersion(10, 2),
["cudnn7", "py3", "jit_legacy", "test"],
["pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build"],
use_cuda_docker=True,
),
]
def get_workflow_jobs():
return [item.gen_tree() for item in WORKFLOW_DATA]

View File

@ -1,7 +1,7 @@
from cimodel.data.simple.util.versions import MultiPartVersion
import cimodel.lib.miniutils as miniutils
XCODE_VERSION = MultiPartVersion([12, 5, 1])
XCODE_VERSION = MultiPartVersion([12, 0, 0])
class ArchVariant:

View File

@ -1,5 +1,4 @@
import cimodel.data.simple.ios_definitions as ios_definitions
import cimodel.lib.miniutils as miniutils
class IOSNightlyJob:
@ -44,8 +43,6 @@ class IOSNightlyJob:
props_dict["ios_arch"] = self.variant
props_dict["ios_platform"] = ios_definitions.get_platform(self.variant)
props_dict["name"] = self.gen_job_name()
props_dict["use_metal"] = miniutils.quote(str(int(True)))
props_dict["use_coreml"] = miniutils.quote(str(int(True)))
template_name = "_".join([
"binary",

View File

@ -58,7 +58,7 @@ class WindowsJob:
self.cudnn_version = 8 if self.cuda_version.major == 11 else 7
arch_env_elements = (
["cuda" + str(self.cuda_version.major) + "." + str(self.cuda_version.minor)]
["cuda" + str(self.cuda_version.major), "cudnn" + str(self.cudnn_version)]
if self.cuda_version
else ["cpu"]
)
@ -78,7 +78,6 @@ class WindowsJob:
props_dict = {
"build_environment": build_environment_string,
"python_version": miniutils.quote(python_version),
"vs_version": miniutils.quote("16.8.6"),
"vc_version": miniutils.quote(self.vscode_spec.dotted_version()),
"vc_year": miniutils.quote(str(self.vscode_spec.year)),
"vc_product": self.vscode_spec.get_product(),
@ -146,10 +145,10 @@ class VcSpec:
_VC2019 = VcSpec(2019)
WORKFLOW_DATA = [
# VS2019 CUDA-10.2
WindowsJob(None, _VC2019, CudaVersion(10, 2), master_only=True),
# VS2019 CUDA-10.2 force on cpu
WindowsJob(1, _VC2019, CudaVersion(10, 2), force_on_cpu=True, master_only=True),
# VS2019 CUDA-10.1
WindowsJob(None, _VC2019, CudaVersion(10, 1), master_only=True),
# VS2019 CUDA-10.1 force on cpu
WindowsJob(1, _VC2019, CudaVersion(10, 1), force_on_cpu=True, master_only=True),
# TODO: This test is disabled due to https://github.com/pytorch/pytorch/issues/59724
# WindowsJob('_azure_multi_gpu', _VC2019, CudaVersion(11, 1), multi_gpu=True, master_and_nightly=True),

File diff suppressed because it is too large Load Diff

View File

@ -27,5 +27,5 @@ Docker builds are now defined with `.circleci/cimodel/data/simple/docker_definit
./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
# Set flags (see build.sh) and build image
sudo bash -c 'PROTOBUF=1 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
sudo bash -c 'BREAKPAD=1 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
```

View File

@ -78,108 +78,119 @@ TRAVIS_DL_URL_PREFIX="https://s3.amazonaws.com/travis-python-archives/binaries/u
case "$image" in
pytorch-linux-xenial-py3.8)
ANACONDA_PYTHON_VERSION=3.8
CMAKE_VERSION=3.10.3
GCC_VERSION=7
# Do not install PROTOBUF, DB, and VISION as a test
;;
pytorch-linux-xenial-py3.6-gcc5.4)
ANACONDA_PYTHON_VERSION=3.6
CMAKE_VERSION=3.10.3
GCC_VERSION=5
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
BREAKPAD=yes
;;
pytorch-linux-xenial-py3.6-gcc7.2)
ANACONDA_PYTHON_VERSION=3.6
CMAKE_VERSION=3.10.3
GCC_VERSION=7
# Do not install PROTOBUF, DB, and VISION as a test
;;
pytorch-linux-xenial-py3.6-gcc7)
ANACONDA_PYTHON_VERSION=3.6
CMAKE_VERSION=3.10.3
GCC_VERSION=7
PROTOBUF=yes
DB=yes
VISION=yes
BREAKPAD=yes
;;
pytorch-linux-xenial-cuda10-cudnn7-py3-gcc7)
CUDA_VERSION=10.0
CUDNN_VERSION=7
ANACONDA_PYTHON_VERSION=3.6
GCC_VERSION=7
PROTOBUF=yes
DB=yes
VISION=yes
BREAKPAD=yes
;;
pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7)
CUDA_VERSION=10.1
CUDNN_VERSION=7
ANACONDA_PYTHON_VERSION=3.6
GCC_VERSION=7
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
BREAKPAD=yes
;;
pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7)
CUDA_VERSION=10.2
CUDNN_VERSION=7
ANACONDA_PYTHON_VERSION=3.6
CMAKE_VERSION=3.10.3
GCC_VERSION=7
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
BREAKPAD=yes
;;
pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7)
CUDA_VERSION=11.1
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.6
CMAKE_VERSION=3.10.3
GCC_VERSION=7
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
BREAKPAD=yes
;;
pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7)
CUDA_VERSION=11.3.0 # Deviating from major.minor to conform to nvidia's Docker image names
CUDNN_VERSION=8
ANACONDA_PYTHON_VERSION=3.6
CMAKE_VERSION=3.10.3
GCC_VERSION=7
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
BREAKPAD=yes
;;
pytorch-linux-xenial-py3-clang5-asan)
ANACONDA_PYTHON_VERSION=3.6
CLANG_VERSION=5.0
CMAKE_VERSION=3.10.3
PROTOBUF=yes
DB=yes
VISION=yes
;;
pytorch-linux-xenial-py3-clang7-asan)
ANACONDA_PYTHON_VERSION=3.6
CLANG_VERSION=7
CMAKE_VERSION=3.10.3
PROTOBUF=yes
DB=yes
VISION=yes
BREAKPAD=yes
;;
pytorch-linux-xenial-py3-clang7-onnx)
ANACONDA_PYTHON_VERSION=3.6
CLANG_VERSION=7
CMAKE_VERSION=3.10.3
PROTOBUF=yes
DB=yes
VISION=yes
BREAKPAD=yes
;;
pytorch-linux-xenial-py3-clang5-android-ndk-r19c)
ANACONDA_PYTHON_VERSION=3.6
CLANG_VERSION=5.0
CMAKE_VERSION=3.10.3
LLVMDEV=yes
PROTOBUF=yes
ANDROID=yes
ANDROID_NDK_VERSION=r19c
GRADLE_VERSION=6.8.3
CMAKE_VERSION=3.7.0
NINJA_VERSION=1.9.0
;;
pytorch-linux-xenial-py3.6-clang7)
ANACONDA_PYTHON_VERSION=3.6
CMAKE_VERSION=3.10.3
CLANG_VERSION=7
PROTOBUF=yes
DB=yes
VISION=yes
BREAKPAD=yes
;;
pytorch-linux-bionic-py3.6-clang9)
ANACONDA_PYTHON_VERSION=3.6
@ -187,6 +198,7 @@ case "$image" in
PROTOBUF=yes
DB=yes
VISION=yes
BREAKPAD=yes
VULKAN_SDK_VERSION=1.2.162.1
SWIFTSHADER=yes
;;
@ -196,6 +208,8 @@ case "$image" in
PROTOBUF=yes
DB=yes
VISION=yes
BREAKPAD=yes
BREAKPAD=yes
;;
pytorch-linux-bionic-cuda10.2-cudnn7-py3.6-clang9)
CUDA_VERSION=10.2
@ -205,6 +219,17 @@ case "$image" in
PROTOBUF=yes
DB=yes
VISION=yes
BREAKPAD=yes
;;
pytorch-linux-bionic-cuda10.2-cudnn7-py3.8-gcc9)
CUDA_VERSION=10.2
CUDNN_VERSION=7
ANACONDA_PYTHON_VERSION=3.8
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
BREAKPAD=yes
;;
pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7)
CUDA_VERSION=10.2
@ -214,6 +239,7 @@ case "$image" in
PROTOBUF=yes
DB=yes
VISION=yes
BREAKPAD=yes
;;
pytorch-linux-bionic-cuda11.0-cudnn8-py3.6-gcc9)
CUDA_VERSION=11.0
@ -223,14 +249,25 @@ case "$image" in
PROTOBUF=yes
DB=yes
VISION=yes
BREAKPAD=yes
ROCM_VERSION=3.9
;;
pytorch-linux-bionic-rocm4.0.1-py3.6)
ANACONDA_PYTHON_VERSION=3.6
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
BREAKPAD=yes
ROCM_VERSION=4.0.1
;;
pytorch-linux-bionic-rocm4.1-py3.6)
ANACONDA_PYTHON_VERSION=3.6
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
BREAKPAD=yes
ROCM_VERSION=4.1
;;
pytorch-linux-bionic-rocm4.2-py3.6)
@ -239,25 +276,16 @@ case "$image" in
PROTOBUF=yes
DB=yes
VISION=yes
BREAKPAD=yes
ROCM_VERSION=4.2
;;
pytorch-linux-bionic-rocm4.3.1-py3.6)
ANACONDA_PYTHON_VERSION=3.6
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=4.3.1
;;
*)
# Catch-all for builds that are not hardcoded.
PROTOBUF=yes
DB=yes
VISION=yes
BREAKPAD=yes
echo "image '$image' did not match an existing build configuration"
if [[ "$image" == *xenial* ]]; then
CMAKE_VERSION=3.10.3
fi
if [[ "$image" == *py* ]]; then
extract_version_from_image_name py ANACONDA_PYTHON_VERSION
fi
@ -292,7 +320,7 @@ if [ -n "${JENKINS:-}" ]; then
JENKINS_GID=$(id -g jenkins)
fi
tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
tmp_tag="tmp-$(cat /dev/urandom | tr -dc 'a-z' | head -c 32)"
# Build image
# TODO: build-arg THRIFT is not turned on for any image, remove it once we confirm
@ -320,6 +348,7 @@ docker build \
--build-arg "GCC_VERSION=${GCC_VERSION}" \
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
--build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \
--build-arg "BREAKPAD=${BREAKPAD}" \
--build-arg "ANDROID=${ANDROID}" \
--build-arg "ANDROID_NDK=${ANDROID_NDK_VERSION}" \
--build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \

View File

@ -0,0 +1,25 @@
#!/bin/bash
set -ex
git clone https://github.com/driazati/breakpad.git
pushd breakpad
# breakpad has no actual releases, so this is pinned to the top commit from
# main when this was forked (including the one patch commit). This uses a fork
# of the breakpad mainline that automatically daisy-chains out to any previously
# installed signal handlers (instead of overwriting them).
git checkout 5485e473ed46d065e05489e50dfc59d90dfd7e22
git clone https://chromium.googlesource.com/linux-syscall-support src/third_party/lss
pushd src/third_party/lss
# same as with breakpad, there are no real releases for this repo so use a
# commit as the pin
git checkout e1e7b0ad8ee99a875b272c8e33e308472e897660
popd
./configure
make
make install
popd
rm -rf breakpad

View File

@ -4,9 +4,6 @@ set -ex
[ -n "$CMAKE_VERSION" ]
# Remove system cmake install so it won't get used instead
apt-get remove cmake -y
# Turn 3.6.3 into v3.6
path=$(echo "${CMAKE_VERSION}" | sed -e 's/\([0-9].[0-9]\+\).*/v\1/')
file="cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz"

View File

@ -69,8 +69,8 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
}
# Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
# DO NOT install cmake here as it would install a version newer than 3.10, but
# we want to pin to version 3.10.
# DO NOT install cmake here as it would install a version newer than 3.5, but
# we want to pin to version 3.5.
SCIPY_VERSION=1.1.0
if [ "$ANACONDA_PYTHON_VERSION" = "3.9" ]; then
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
@ -86,7 +86,11 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
conda_install numpy=1.18.5 astunparse pyyaml mkl mkl-include setuptools cffi future six dataclasses typing_extensions
fi
if [[ "$CUDA_VERSION" == 10.2* ]]; then
if [[ "$CUDA_VERSION" == 10.0* ]]; then
conda_install magma-cuda100 -c pytorch
elif [[ "$CUDA_VERSION" == 10.1* ]]; then
conda_install magma-cuda101 -c pytorch
elif [[ "$CUDA_VERSION" == 10.2* ]]; then
conda_install magma-cuda102 -c pytorch
elif [[ "$CUDA_VERSION" == 11.0* ]]; then
conda_install magma-cuda110 -c pytorch
@ -112,7 +116,6 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
boto3==1.16.34 \
coverage==5.5 \
hypothesis==4.53.2 \
expecttest==0.1.3 \
mypy==0.812 \
tb-nightly

View File

@ -2,6 +2,23 @@
set -ex
# This function installs protobuf 2.6
install_protobuf_26() {
pb_dir="/usr/temp_pb_install_dir"
mkdir -p $pb_dir
# On the nvidia/cuda:9-cudnn7-devel-centos7 image we need this symlink or
# else it will fail with
# g++: error: ./../lib64/crti.o: No such file or directory
ln -s /usr/lib64 "$pb_dir/lib64"
curl -LO "https://github.com/google/protobuf/releases/download/v2.6.1/protobuf-2.6.1.tar.gz"
tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-2.6.1.tar.gz
pushd "$pb_dir" && ./configure && make && make check && sudo make install && sudo ldconfig
popd
rm -rf $pb_dir
}
install_ubuntu() {
apt-get update
apt-get install -y --no-install-recommends \

View File

@ -0,0 +1,4 @@
#!/bin/bash
sudo apt-get -qq update
sudo apt-get -qq install --allow-downgrades --allow-change-held-packages libnccl-dev=2.5.6-1+cuda10.1 libnccl2=2.5.6-1+cuda10.1

View File

@ -1,10 +1,4 @@
#!/bin/bash
sudo apt-get update
# also install ssh to avoid error of:
# --------------------------------------------------------------------------
# The value of the MCA parameter "plm_rsh_agent" was set to a path
# that could not be found:
# plm_rsh_agent: ssh : rsh
sudo apt-get install -y ssh
sudo apt-get install -y --allow-downgrades --allow-change-held-packages openmpi-bin libopenmpi-dev

View File

@ -2,8 +2,8 @@
set -ex
# This function installs protobuf 3.17
install_protobuf_317() {
# This function installs protobuf 2.6
install_protobuf_26() {
pb_dir="/usr/temp_pb_install_dir"
mkdir -p $pb_dir
@ -12,32 +12,37 @@ install_protobuf_317() {
# g++: error: ./../lib64/crti.o: No such file or directory
ln -s /usr/lib64 "$pb_dir/lib64"
curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz"
tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz
# -j2 to balance memory usage and speed.
# naked `-j` seems to use too much memory.
pushd "$pb_dir" && ./configure && make -j2 && make -j2 check && sudo make -j2 install && sudo ldconfig
curl -LO "https://github.com/google/protobuf/releases/download/v2.6.1/protobuf-2.6.1.tar.gz"
tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-2.6.1.tar.gz
pushd "$pb_dir" && ./configure && make && make check && sudo make install && sudo ldconfig
popd
rm -rf $pb_dir
}
install_ubuntu() {
# Ubuntu 14.04 has cmake 2.8.12 as the default option, so we will
# Ubuntu 14.04 ships with protobuf 2.5, but ONNX needs protobuf >= 2.6
# so we install that here if on 14.04
# Ubuntu 14.04 also has cmake 2.8.12 as the default option, so we will
# install cmake3 here and use cmake3.
apt-get update
if [[ "$UBUNTU_VERSION" == 14.04 ]]; then
apt-get install -y --no-install-recommends cmake3
install_protobuf_26
else
apt-get install -y --no-install-recommends \
libprotobuf-dev \
protobuf-compiler
fi
# Cleanup
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
install_protobuf_317
}
install_centos() {
install_protobuf_317
# Centos7 ships with protobuf 2.5, but ONNX needs protobuf >= 2.6
# so we always install install that here
install_protobuf_26
}
# Install base packages depending on the base OS

View File

@ -4,13 +4,9 @@ set -ex
install_magma() {
# "install" hipMAGMA into /opt/rocm/magma by copying after build
git clone https://bitbucket.org/icl/magma.git -b magma_ctrl_launch_bounds
git clone https://bitbucket.org/icl/magma.git
pushd magma
# The branch "magma_ctrl_launch_bounds" is having a fix over the below commit, so keeping the below comment for reference.
#git checkout 878b1ce02e9cfe4a829be22c8f911e9c0b6bd88f
# Work around non-asii characters in certain magma sources; remove this after upstream magma fixes this.
perl -i.bak -pe 's/[^[:ascii:]]//g' sparse/control/magma_zfree.cpp
perl -i.bak -pe 's/[^[:ascii:]]//g' sparse/control/magma_zsolverinfo.cpp
git checkout 878b1ce02e9cfe4a829be22c8f911e9c0b6bd88f
cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib' >> make.inc
@ -19,7 +15,7 @@ install_magma() {
sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
export PATH="${PATH}:/opt/rocm/bin"
make -f make.gen.hipMAGMA -j $(nproc)
make lib/libmagma.so -j $(nproc) MKLROOT=/opt/conda
LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT=/opt/conda
make testing/testing_dgemm -j $(nproc) MKLROOT=/opt/conda
popd
mv magma /opt/rocm

View File

@ -2,6 +2,23 @@
set -ex
# This function installs protobuf 2.6
install_protobuf_26() {
pb_dir="/usr/temp_pb_install_dir"
mkdir -p $pb_dir
# On the nvidia/cuda:9-cudnn7-devel-centos7 image we need this symlink or
# else it will fail with
# g++: error: ./../lib64/crti.o: No such file or directory
ln -s /usr/lib64 "$pb_dir/lib64"
curl -LO "https://github.com/google/protobuf/releases/download/v2.6.1/protobuf-2.6.1.tar.gz"
tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-2.6.1.tar.gz
pushd "$pb_dir" && ./configure && make && make check && sudo make install && sudo ldconfig
popd
rm -rf $pb_dir
}
install_ubuntu() {
apt-get update
apt-get install -y --no-install-recommends \

View File

@ -61,16 +61,6 @@ RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
RUN rm install_vision.sh
ENV INSTALLED_VISION ${VISION}
ADD ./common/install_openssl.sh install_openssl.sh
ENV OPENSSL_ROOT_DIR /opt/openssl
RUN bash ./install_openssl.sh
# (optional) Install non-default CMake version
ARG CMAKE_VERSION
ADD ./common/install_cmake.sh install_cmake.sh
RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
RUN rm install_cmake.sh
# Install ccache/sccache (do this last, so we get priority in PATH)
ADD ./common/install_cache.sh install_cache.sh
ENV PATH /opt/cache/bin:$PATH
@ -82,6 +72,11 @@ ADD ./common/install_jni.sh install_jni.sh
ADD ./java/jni.h jni.h
RUN bash ./install_jni.sh && rm install_jni.sh
# Install NCCL for when CUDA is version 10.1
ADD ./common/install_nccl.sh install_nccl.sh
RUN if [ "${CUDA_VERSION}" = 10.1 ]; then bash ./install_nccl.sh; fi
RUN rm install_nccl.sh
# Install Open MPI for CUDA
ADD ./common/install_openmpi.sh install_openmpi.sh
RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
@ -98,5 +93,9 @@ ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
# Install LLVM dev version (Defined in the pytorch/builder github repository)
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
ADD ./common/install_openssl.sh install_openssl.sh
ENV OPENSSL_ROOT_DIR /opt/openssl
RUN bash ./install_openssl.sh
USER jenkins
CMD ["bash"]

View File

@ -82,6 +82,13 @@ RUN rm AndroidManifest.xml
RUN rm build.gradle
ENV INSTALLED_ANDROID ${ANDROID}
# (optional) Install breakpad
ARG BREAKPAD
ADD ./common/install_breakpad.sh install_breakpad.sh
RUN if [ -n "${BREAKPAD}" ]; then bash ./install_breakpad.sh; fi
RUN rm install_breakpad.sh
ENV INSTALLED_BREAKPAD ${BREAKPAD}
# (optional) Install Vulkan SDK
ARG VULKAN_SDK_VERSION
ADD ./common/install_vulkan_sdk.sh install_vulkan_sdk.sh
@ -106,10 +113,6 @@ ADD ./common/install_ninja.sh install_ninja.sh
RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
RUN rm install_ninja.sh
ADD ./common/install_openssl.sh install_openssl.sh
RUN bash ./install_openssl.sh
ENV OPENSSL_ROOT_DIR /opt/openssl
# Install ccache/sccache (do this last, so we get priority in PATH)
ADD ./common/install_cache.sh install_cache.sh
ENV PATH /opt/cache/bin:$PATH
@ -127,5 +130,9 @@ ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
# Install LLVM dev version (Defined in the pytorch/builder github repository)
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
ADD ./common/install_openssl.sh install_openssl.sh
RUN bash ./install_openssl.sh
ENV OPENSSL_ROOT_DIR /opt/openssl
USER jenkins
CMD ["bash"]

View File

@ -13,8 +13,10 @@ from collections import namedtuple
import cimodel.data.binary_build_definitions as binary_build_definitions
import cimodel.data.pytorch_build_definitions as pytorch_build_definitions
import cimodel.data.simple.android_definitions
import cimodel.data.simple.bazel_definitions
import cimodel.data.simple.binary_smoketest
import cimodel.data.simple.docker_definitions
import cimodel.data.simple.ge_config_tests
import cimodel.data.simple.ios_definitions
import cimodel.data.simple.macos_definitions
import cimodel.data.simple.mobile_definitions
@ -133,6 +135,8 @@ def gen_build_workflows_tree():
cimodel.data.simple.android_definitions.get_workflow_jobs,
cimodel.data.simple.ios_definitions.get_workflow_jobs,
cimodel.data.simple.mobile_definitions.get_workflow_jobs,
cimodel.data.simple.ge_config_tests.get_workflow_jobs,
cimodel.data.simple.bazel_definitions.get_workflow_jobs,
cimodel.data.simple.binary_smoketest.get_workflow_jobs,
cimodel.data.simple.nightly_ios.get_workflow_jobs,
cimodel.data.simple.nightly_android.get_workflow_jobs,
@ -150,10 +154,7 @@ def gen_build_workflows_tree():
binary_build_definitions.get_nightly_uploads,
]
slow_gradcheck_jobs = [
pytorch_build_definitions.get_workflow_jobs,
cimodel.data.simple.docker_definitions.get_workflow_jobs,
]
slow_gradcheck_jobs = pytorch_build_definitions.get_workflow_jobs(only_slow_gradcheck=True)
return {
"workflows": {
@ -171,7 +172,7 @@ def gen_build_workflows_tree():
},
"slow_gradcheck_build": {
"when": r"<< pipeline.parameters.run_slow_gradcheck_build >>",
"jobs": [f(only_slow_gradcheck=True) for f in slow_gradcheck_jobs],
"jobs": slow_gradcheck_jobs,
},
}
}

View File

@ -55,13 +55,13 @@ else
echo "Can't tell what to checkout"
exit 1
fi
retry git submodule update --init --recursive --jobs 0
retry git submodule update --init --recursive
echo "Using Pytorch from "
git --no-pager log --max-count 1
popd
# Clone the Builder master repo
retry git clone -q https://github.com/pytorch/builder.git -b release/1.10 "$BUILDER_ROOT"
retry git clone -q https://github.com/pytorch/builder.git "$BUILDER_ROOT"
pushd "$BUILDER_ROOT"
echo "Using builder from "
git --no-pager log --max-count 1

View File

@ -22,7 +22,7 @@ export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
# sync submodules
cd ${PROJ_ROOT}
git submodule sync
git submodule update --init --recursive --jobs 0
git submodule update --init --recursive
# run build script
chmod a+x ${PROJ_ROOT}/scripts/build_ios.sh
@ -31,12 +31,8 @@ cat ${PROJ_ROOT}/scripts/build_ios.sh
echo "########################################################"
echo "IOS_ARCH: ${IOS_ARCH}"
echo "IOS_PLATFORM: ${IOS_PLATFORM}"
echo "USE_PYTORCH_METAL: ${USE_PYTORCH_METAL}"
echo "USE_COREML_DELEGATE: ${USE_COREML_DELEGATE}"
export IOS_ARCH=${IOS_ARCH}
export IOS_PLATFORM=${IOS_PLATFORM}
export USE_PYTORCH_METAL=${USE_PYTORCH_METAL}
export USE_COREML_DELEGATE=${USE_COREML_DELEGATE}
unbuffer ${PROJ_ROOT}/scripts/build_ios.sh 2>&1 | ts
#store the binary

View File

@ -8,17 +8,16 @@ cd ${PROJ_ROOT}/ios/TestApp
# install fastlane
sudo gem install bundler && bundle install
# install certificates
echo "${IOS_CERT_KEY_2022}" >> cert.txt
echo "${IOS_CERT_KEY}" >> cert.txt
base64 --decode cert.txt -o Certificates.p12
rm cert.txt
bundle exec fastlane install_root_cert
bundle exec fastlane install_dev_cert
bundle exec fastlane install_cert
# install the provisioning profile
PROFILE=PyTorch_CI_2022.mobileprovision
PROFILE=PyTorch_CI_2021.mobileprovision
PROVISIONING_PROFILES=~/Library/MobileDevice/Provisioning\ Profiles
mkdir -pv "${PROVISIONING_PROFILES}"
cd "${PROVISIONING_PROFILES}"
echo "${IOS_SIGN_KEY_2022}" >> cert.txt
echo "${IOS_SIGN_KEY}" >> cert.txt
base64 --decode cert.txt -o ${PROFILE}
rm cert.txt
# run the ruby build script
@ -26,5 +25,5 @@ if ! [ -x "$(command -v xcodebuild)" ]; then
echo 'Error: xcodebuild is not installed.'
exit 1
fi
PROFILE=PyTorch_CI_2022
ruby ${PROJ_ROOT}/scripts/xcode_build.rb -i ${PROJ_ROOT}/build_ios/install -x ${PROJ_ROOT}/ios/TestApp/TestApp.xcodeproj -p ${IOS_PLATFORM} -c ${PROFILE} -t ${IOS_DEV_TEAM_ID} -f Accelerate,MetalPerformanceShaders,CoreML
PROFILE=PyTorch_CI_2021
ruby ${PROJ_ROOT}/scripts/xcode_build.rb -i ${PROJ_ROOT}/build_ios/install -x ${PROJ_ROOT}/ios/TestApp/TestApp.xcodeproj -p ${IOS_PLATFORM} -c ${PROFILE} -t ${IOS_DEV_TEAM_ID}

View File

@ -27,14 +27,11 @@ lipo -i ${ZIP_DIR}/install/lib/*.a
cp ${PROJ_ROOT}/ios/LibTorch-Lite.h ${ZIP_DIR}/src/
cp ${PROJ_ROOT}/LICENSE ${ZIP_DIR}/
# zip the library
export DATE="$(date -u +%Y%m%d)"
export IOS_NIGHTLY_BUILD_VERSION="1.10.0.${DATE}"
# libtorch_lite_ios_nightly_1.10.0.20210810.zip
ZIPFILE="libtorch_lite_ios_nightly_${IOS_NIGHTLY_BUILD_VERSION}.zip"
ZIPFILE=libtorch_ios_nightly_build.zip
cd ${ZIP_DIR}
#for testing
touch version.txt
echo "${IOS_NIGHTLY_BUILD_VERSION}" > version.txt
echo $(date +%s) > version.txt
zip -r ${ZIPFILE} install src version.txt LICENSE
# upload to aws
# Install conda then 'conda install' awscli
@ -51,14 +48,3 @@ set +x
# echo "AWS KEY: ${AWS_ACCESS_KEY_ID}"
# echo "AWS SECRET: ${AWS_SECRET_ACCESS_KEY}"
aws s3 cp ${ZIPFILE} s3://ossci-ios-build/ --acl public-read
# create a new LibTorch-Lite-Nightly.podspec from the template
echo "cp ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec.template ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec"
cp ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec.template ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
# update pod version
sed -i '' -e "s/IOS_NIGHTLY_BUILD_VERSION/${IOS_NIGHTLY_BUILD_VERSION}/g" ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
cat ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
# push the new LibTorch-Lite-Nightly.podspec to CocoaPods
pod trunk push --verbose --allow-warnings --use-libraries --skip-import-validation ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec

View File

@ -9,6 +9,10 @@ python_nodot="\$(echo $DESIRED_PYTHON | tr -d m.u)"
# Set up Python
if [[ "$PACKAGE_TYPE" == conda ]]; then
# There was a bug that was introduced in conda-package-handling >= 1.6.1 that makes archives
# above a certain size fail out when attempting to extract
# see: https://github.com/conda/conda-package-handling/issues/71
conda install -y conda-package-handling=1.6.0
retry conda create -qyn testenv python="$DESIRED_PYTHON"
source activate testenv >/dev/null
elif [[ "$PACKAGE_TYPE" != libtorch ]]; then

View File

@ -14,10 +14,6 @@ chmod +x "$build_script"
# Build
cat >"$build_script" <<EOL
export PATH="$workdir/miniconda/bin:$PATH"
if [[ "$CIRCLE_BRANCH" == "nightly" ]]; then
export USE_PYTORCH_METAL_EXPORT=1
export USE_COREML_DELEGATE=1
fi
if [[ "$PACKAGE_TYPE" == conda ]]; then
"$workdir/builder/conda/build_pytorch.sh"
else

View File

@ -62,7 +62,7 @@ if [[ -z "$DOCKER_IMAGE" ]]; then
if [[ "$PACKAGE_TYPE" == conda ]]; then
export DOCKER_IMAGE="pytorch/conda-cuda"
elif [[ "$DESIRED_CUDA" == cpu ]]; then
export DOCKER_IMAGE="pytorch/manylinux-cpu"
export DOCKER_IMAGE="pytorch/manylinux-cuda100"
else
export DOCKER_IMAGE="pytorch/manylinux-cuda${DESIRED_CUDA:2}"
fi

View File

@ -8,45 +8,15 @@ export CUDA_VERSION="${DESIRED_CUDA/cu/}"
export USE_SCCACHE=1
export SCCACHE_BUCKET=ossci-compiler-cache-windows
export NIGHTLIES_PYTORCH_ROOT="$PYTORCH_ROOT"
export VC_YEAR=2019
if [[ "${DESIRED_CUDA}" == "cu111" || "${DESIRED_CUDA}" == "cu113" ]]; then
export BUILD_SPLIT_CUDA="ON"
if [[ "$CUDA_VERSION" == "92" || "$CUDA_VERSION" == "100" ]]; then
export VC_YEAR=2017
else
export VC_YEAR=2019
fi
echo "Free Space for CUDA DEBUG BUILD"
if [[ "$CIRCLECI" == 'true' ]]; then
if [[ -d "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community" ]]; then
rm -rf "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community"
fi
if [[ -d "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0" ]]; then
rm -rf "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0"
fi
if [[ -d "C:\\Program Files (x86)\\Microsoft.NET" ]]; then
rm -rf "C:\\Program Files (x86)\\Microsoft.NET"
fi
if [[ -d "C:\\Program Files\\dotnet" ]]; then
rm -rf "C:\\Program Files\\dotnet"
fi
if [[ -d "C:\\Program Files (x86)\\dotnet" ]]; then
rm -rf "C:\\Program Files (x86)\\dotnet"
fi
if [[ -d "C:\\Program Files (x86)\\Microsoft SQL Server" ]]; then
rm -rf "C:\\Program Files (x86)\\Microsoft SQL Server"
fi
if [[ -d "C:\\Program Files (x86)\\Xamarin" ]]; then
rm -rf "C:\\Program Files (x86)\\Xamarin"
fi
if [[ -d "C:\\Program Files (x86)\\Google" ]]; then
rm -rf "C:\\Program Files (x86)\\Google"
fi
if [[ "${DESIRED_CUDA}" == "cu111" || "${DESIRED_CUDA}" == "cu113" ]]; then
export BUILD_SPLIT_CUDA="ON"
fi
set +x
@ -62,8 +32,7 @@ if [[ "$CIRCLECI" == 'true' && -d "C:\\ProgramData\\Microsoft\\VisualStudio\\Pac
fi
if [[ "$CIRCLECI" == 'true' && -d "C:\\Microsoft" ]]; then
# don't use quotes here
rm -rf /c/Microsoft/AndroidNDK*
rm -rf "C:\\Microsoft\\Android*"
fi
echo "Free space on filesystem before build:"

View File

@ -4,7 +4,13 @@ set -eux -o pipefail
source "/c/w/env"
export CUDA_VERSION="${DESIRED_CUDA/cu/}"
export VC_YEAR=2019
export VC_YEAR=2017
if [[ "$CUDA_VERSION" == "92" || "$CUDA_VERSION" == "100" ]]; then
export VC_YEAR=2017
else
export VC_YEAR=2019
fi
pushd "$BUILDER_ROOT"

View File

@ -10,27 +10,18 @@ pt_checkout="/var/lib/jenkins/workspace"
# Since we're cat-ing this file, we need to escape all $'s
echo "cpp_doc_push_script.sh: Invoked with $*"
# for statements like ${1:-${DOCS_INSTALL_PATH:-docs/}}
# the order of operations goes:
# 1. Check if there's an argument $1
# 2. If no argument check for environment var DOCS_INSTALL_PATH
# 3. If no environment var fall back to default 'docs/'
# NOTE: It might seem weird to gather the second argument before gathering the first argument
# but since DOCS_INSTALL_PATH can be derived from DOCS_VERSION it's probably better to
# try and gather it first, just so we don't potentially break people who rely on this script
# Argument 2: What version of the Python API docs we are building.
version="${2:-${DOCS_VERSION:-master}}"
if [ -z "$version" ]; then
echo "error: cpp_doc_push_script.sh: version (arg2) not specified"
# Argument 1: Where to copy the built documentation for Python API to
# (pytorch.github.io/$install_path)
install_path="$1"
if [ -z "$install_path" ]; then
echo "error: cpp_doc_push_script.sh: install_path (arg1) not specified"
exit 1
fi
# Argument 1: Where to copy the built documentation for Python API to
# (pytorch.github.io/$install_path)
install_path="${1:-${DOCS_INSTALL_PATH:-docs/${DOCS_VERSION}}}"
if [ -z "$install_path" ]; then
echo "error: cpp_doc_push_script.sh: install_path (arg1) not specified"
# Argument 2: What version of the Python API docs we are building.
version="$2"
if [ -z "$version" ]; then
echo "error: cpp_doc_push_script.sh: version (arg2) not specified"
exit 1
fi

View File

@ -13,27 +13,18 @@ echo "python_doc_push_script.sh: Invoked with $*"
set -ex
# for statements like ${1:-${DOCS_INSTALL_PATH:-docs/}}
# the order of operations goes:
# 1. Check if there's an argument $1
# 2. If no argument check for environment var DOCS_INSTALL_PATH
# 3. If no environment var fall back to default 'docs/'
# NOTE: It might seem weird to gather the second argument before gathering the first argument
# but since DOCS_INSTALL_PATH can be derived from DOCS_VERSION it's probably better to
# try and gather it first, just so we don't potentially break people who rely on this script
# Argument 2: What version of the docs we are building.
version="${2:-${DOCS_VERSION:-master}}"
if [ -z "$version" ]; then
echo "error: python_doc_push_script.sh: version (arg2) not specified"
# Argument 1: Where to copy the built documentation to
# (pytorch.github.io/$install_path)
install_path="$1"
if [ -z "$install_path" ]; then
echo "error: python_doc_push_script.sh: install_path (arg1) not specified"
exit 1
fi
# Argument 1: Where to copy the built documentation to
# (pytorch.github.io/$install_path)
install_path="${1:-${DOCS_INSTALL_PATH:-docs/${DOCS_VERSION}}}"
if [ -z "$install_path" ]; then
echo "error: python_doc_push_script.sh: install_path (arg1) not specified"
# Argument 2: What version of the docs we are building.
version="$2"
if [ -z "$version" ]; then
echo "error: python_doc_push_script.sh: version (arg2) not specified"
exit 1
fi
@ -43,7 +34,7 @@ if [ "$version" == "master" ]; then
fi
# Argument 3: The branch to push to. Usually is "site"
branch="${3:-${DOCS_BRANCH:-site}}"
branch="$3"
if [ -z "$branch" ]; then
echo "error: python_doc_push_script.sh: branch (arg3) not specified"
exit 1

View File

@ -7,9 +7,6 @@ sudo rm -f /etc/apt/heroku.list
sudo rm -f /etc/apt/openjdk-r-ubuntu-ppa-xenial.list
sudo rm -f /etc/apt/partner.list
# To increase the network reliability, let apt decide which mirror is best to use
sudo sed -i -e 's/http:\/\/.*archive/mirror:\/\/mirrors/' -e 's/\/ubuntu\//\/mirrors.txt/' /etc/apt/sources.list
retry () {
$* || $* || $* || $* || $*
}
@ -43,9 +40,9 @@ if [ -n "${USE_CUDA_DOCKER_RUNTIME:-}" ]; then
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
curl -s -L "https://nvidia.github.io/nvidia-docker/${distribution}/nvidia-docker.list" | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
retry sudo apt-get update -qq
sudo apt-get update -qq
# Necessary to get the `--gpus` flag to function within docker
retry sudo apt-get install -y nvidia-container-toolkit
sudo apt-get install -y nvidia-container-toolkit
sudo systemctl restart docker
else
# Explicitly remove nvidia docker apt repositories if not building for cuda
@ -67,7 +64,6 @@ add_to_env_file() {
}
add_to_env_file IN_CI 1
add_to_env_file CI_MASTER "${CI_MASTER:-}"
add_to_env_file COMMIT_SOURCE "${CIRCLE_BRANCH:-}"
add_to_env_file BUILD_ENVIRONMENT "${BUILD_ENVIRONMENT}"
add_to_env_file CIRCLE_PULL_REQUEST "${CIRCLE_PULL_REQUEST}"

View File

@ -9,16 +9,10 @@ import sys
import time
import zipfile
from typing import Any, Dict, Generator, List
from tools.stats.scribe import (
send_to_scribe,
rds_write,
register_rds_schema,
schema_from_sample,
)
import requests
def get_size(file_dir: str) -> int:
def get_size(file_dir):
try:
# we should only expect one file, if no, something is wrong
file_name = glob.glob(os.path.join(file_dir, "*"))[0]
@ -28,21 +22,15 @@ def get_size(file_dir: str) -> int:
return 0
def base_data() -> Dict[str, Any]:
return {
"run_duration_seconds": int(
time.time() - os.path.getmtime(os.path.realpath(__file__))
),
}
def build_message(size: int) -> Dict[str, Any]:
build_env_split: List[Any] = os.environ.get("BUILD_ENVIRONMENT", "").split()
pkg_type, py_ver, cu_ver, *_ = build_env_split + [None, None, None]
def build_message(size):
pkg_type, py_ver, cu_ver, *_ = os.environ.get("BUILD_ENVIRONMENT", "").split() + [
None,
None,
None,
]
os_name = os.uname()[0].lower()
if os_name == "darwin":
os_name = "macos"
return {
"normal": {
"os": os_name,
@ -59,30 +47,38 @@ def build_message(size: int) -> Dict[str, Any]:
"time": int(time.time()),
"size": size,
"commit_time": int(os.environ.get("COMMIT_TIME", "0")),
"run_duration": int(
time.time() - os.path.getmtime(os.path.realpath(__file__))
),
"run_duration": int(time.time() - os.path.getmtime(os.path.realpath(__file__))),
},
}
def send_message(messages: List[Dict[str, Any]]) -> None:
logs = json.dumps(
[
{
"category": "perfpipe_pytorch_binary_size",
"message": json.dumps(message),
"line_escape": False,
}
for message in messages
]
def send_message(messages):
access_token = os.environ.get("SCRIBE_GRAPHQL_ACCESS_TOKEN")
if not access_token:
raise ValueError("Can't find access token from environment variable")
url = "https://graph.facebook.com/scribe_logs"
r = requests.post(
url,
data={
"access_token": access_token,
"logs": json.dumps(
[
{
"category": "perfpipe_pytorch_binary_size",
"message": json.dumps(message),
"line_escape": False,
}
for message in messages
]
),
},
)
res = send_to_scribe(logs)
print(res)
print(r.text)
r.raise_for_status()
def report_android_sizes(file_dir: str) -> None:
def gen_sizes() -> Generator[List[Any], None, None]:
def report_android_sizes(file_dir):
def gen_sizes():
# we should only expect one file, if no, something is wrong
aar_files = list(pathlib.Path(file_dir).rglob("pytorch_android-*.aar"))
if len(aar_files) != 1:
@ -105,7 +101,7 @@ def report_android_sizes(file_dir: str) -> None:
# report whole package size
yield ["aar", aar_file.name, os.stat(aar_file).st_size, 0]
def gen_messages() -> Generator[Dict[str, Any], None, None]:
def gen_messages():
android_build_type = os.environ.get("ANDROID_BUILD_TYPE")
for arch, lib, comp_size, uncomp_size in gen_sizes():
print(android_build_type, arch, lib, comp_size, uncomp_size)
@ -125,9 +121,7 @@ def report_android_sizes(file_dir: str) -> None:
"int": {
"time": int(time.time()),
"commit_time": int(os.environ.get("COMMIT_TIME", "0")),
"run_duration": int(
time.time() - os.path.getmtime(os.path.realpath(__file__))
),
"run_duration": int(time.time() - os.path.getmtime(os.path.realpath(__file__))),
"size": comp_size,
"raw_size": uncomp_size,
},
@ -142,42 +136,14 @@ if __name__ == "__main__":
)
if len(sys.argv) == 2:
file_dir = sys.argv[1]
if os.getenv("IS_GHA", "0") == "1":
sample_lib = {
"library": "abcd",
"size": 1234,
}
sample_data = {
**base_data(),
**sample_lib,
}
register_rds_schema("binary_size", schema_from_sample(sample_data))
print("checking dir: " + file_dir)
if "-android" in os.environ.get("BUILD_ENVIRONMENT", ""):
report_android_sizes(file_dir)
else:
if os.getenv("IS_GHA", "0") == "1":
build_path = pathlib.Path("build") / "lib"
libraries = [
(path.name, os.stat(path).st_size) for path in build_path.glob("*")
]
data = []
for name, size in libraries:
if name.strip() == "":
continue
library_data = {
"library": name,
"size": size,
}
data.append({**base_data(), **library_data})
rds_write("binary_size", data)
print(json.dumps(data, indent=2))
else:
print("checking dir: " + file_dir)
size = get_size(file_dir)
# Sending the message anyway if no size info is collected.
try:
send_message([build_message(size)])
except Exception:
logging.exception("can't send message")
size = get_size(file_dir)
# Sending the message anyway if no size info is collected.
try:
send_message([build_message(size)])
except Exception:
logging.exception("can't send message")

View File

@ -1,8 +1,8 @@
# https://developercommunity.visualstudio.com/t/install-specific-version-of-vs-component/1142479
# Where to find the links: https://docs.microsoft.com/en-us/visualstudio/releases/2019/history#release-dates-and-build-numbers
# https://docs.microsoft.com/en-us/visualstudio/releases/2019/history#release-dates-and-build-numbers
# BuildTools from S3
$VS_DOWNLOAD_LINK = "https://s3.amazonaws.com/ossci-windows/vs${env:VS_VERSION}_BuildTools.exe"
# 16.8.5 BuildTools
$VS_DOWNLOAD_LINK = "https://download.visualstudio.microsoft.com/download/pr/20130c62-1bc8-43d6-b4f0-c20bb7c79113/145a319d79a83376915d8f855605e152ef5f6fa2b2f1d2dca411fb03722eea72/vs_BuildTools.exe"
$COLLECT_DOWNLOAD_LINK = "https://aka.ms/vscollect.exe"
$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
"--add Microsoft.Component.MSBuild",
@ -18,41 +18,32 @@ if (${env:INSTALL_WINDOWS_SDK} -eq "1") {
$VS_INSTALL_ARGS += "--add Microsoft.VisualStudio.Component.Windows10SDK.19041"
}
if (Test-Path "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe") {
$VS_VERSION_major = [int] ${env:VS_VERSION}.split(".")[0]
$existingPath = & "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -products "Microsoft.VisualStudio.Product.BuildTools" -version "[${env:VS_VERSION}, ${env:VS_VERSION_major + 1})" -property installationPath
if (($existingPath -ne $null) -and (!${env:CIRCLECI})) {
echo "Found correctly versioned existing BuildTools installation in $existingPath"
exit 0
}
$pathToRemove = & "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -products "Microsoft.VisualStudio.Product.BuildTools" -property installationPath
}
echo "Downloading VS installer from S3."
curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
if ($LASTEXITCODE -ne 0) {
echo "Download of the VS 2019 Version ${env:VS_VERSION} installer failed"
echo "Download of the VS 2019 Version 16.8.5 installer failed"
exit 1
}
if ($pathToRemove -ne $null) {
echo "Uninstalling $pathToRemove."
$VS_UNINSTALL_ARGS = @("uninstall", "--installPath", "`"$pathToRemove`"", "--quiet","--wait")
$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_UNINSTALL_ARGS -NoNewWindow -Wait -PassThru
$exitCode = $process.ExitCode
if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
echo "Original BuildTools uninstall failed with code $exitCode"
exit 1
if (Test-Path "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe") {
$existingPath = & "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -products "Microsoft.VisualStudio.Product.BuildTools" -version "[16, 17)" -property installationPath
if ($existingPath -ne $null) {
echo "Found existing BuildTools installation in $existingPath"
$VS_UNINSTALL_ARGS = @("uninstall", "--installPath", "`"$existingPath`"", "--quiet","--wait")
$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_UNINSTALL_ARGS -NoNewWindow -Wait -PassThru
$exitCode = $process.ExitCode
if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
echo "Original BuildTools uninstall failed with code $exitCode"
exit 1
}
echo "Original BuildTools uninstalled"
}
echo "Other versioned BuildTools uninstalled."
}
echo "Installing Visual Studio version ${env:VS_VERSION}."
$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru
Remove-Item -Path vs_installer.exe -Force
$exitCode = $process.ExitCode
if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
echo "VS 2019 installer exited with code $exitCode, which should be one of [0, 3010]."
echo "VS 2017 installer exited with code $exitCode, which should be one of [0, 3010]."
curl.exe --retry 3 -kL $COLLECT_DOWNLOAD_LINK --output Collect.exe
if ($LASTEXITCODE -ne 0) {
echo "Download of the VS Collect tool failed."
@ -60,6 +51,6 @@ if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
}
Start-Process "${PWD}\Collect.exe" -NoNewWindow -Wait -PassThru
New-Item -Path "C:\w\build-results" -ItemType "directory" -Force
Copy-Item -Path "${env:TEMP}\vslogs.zip" -Destination "C:\w\build-results\"
Copy-Item -Path "C:\Users\circleci\AppData\Local\Temp\vslogs.zip" -Destination "C:\w\build-results\"
exit 1
}

View File

@ -1,74 +1,70 @@
#!/bin/bash
set -eux -o pipefail
case ${CUDA_VERSION} in
10.1)
cuda_installer_name="cuda_10.1.243_426.00_win10"
cuda_install_packages="nvcc_10.1 cuobjdump_10.1 nvprune_10.1 cupti_10.1 cublas_10.1 cublas_dev_10.1 cudart_10.1 cufft_10.1 cufft_dev_10.1 curand_10.1 curand_dev_10.1 cusolver_10.1 cusolver_dev_10.1 cusparse_10.1 cusparse_dev_10.1 nvgraph_10.1 nvgraph_dev_10.1 npp_10.1 npp_dev_10.1 nvrtc_10.1 nvrtc_dev_10.1 nvml_dev_10.1"
;;
10.2)
cuda_installer_name="cuda_10.2.89_441.22_win10"
cuda_install_packages="nvcc_10.2 cuobjdump_10.2 nvprune_10.2 cupti_10.2 cublas_10.2 cublas_dev_10.2 cudart_10.2 cufft_10.2 cufft_dev_10.2 curand_10.2 curand_dev_10.2 cusolver_10.2 cusolver_dev_10.2 cusparse_10.2 cusparse_dev_10.2 nvgraph_10.2 nvgraph_dev_10.2 npp_10.2 npp_dev_10.2 nvrtc_10.2 nvrtc_dev_10.2 nvml_dev_10.2"
;;
11.1)
cuda_major_version=${CUDA_VERSION%.*}
if [[ "$cuda_major_version" == "10" ]]; then
cuda_installer_name="cuda_10.1.243_426.00_win10"
msbuild_project_dir="CUDAVisualStudioIntegration/extras/visual_studio_integration/MSBuildExtensions"
cuda_install_packages="nvcc_10.1 cuobjdump_10.1 nvprune_10.1 cupti_10.1 cublas_10.1 cublas_dev_10.1 cudart_10.1 cufft_10.1 cufft_dev_10.1 curand_10.1 curand_dev_10.1 cusolver_10.1 cusolver_dev_10.1 cusparse_10.1 cusparse_dev_10.1 nvgraph_10.1 nvgraph_dev_10.1 npp_10.1 npp_dev_10.1 nvrtc_10.1 nvrtc_dev_10.1 nvml_dev_10.1"
elif [[ "$cuda_major_version" == "11" ]]; then
if [[ "${CUDA_VERSION}" == "11.1" ]]; then
cuda_installer_name="cuda_11.1.0_456.43_win10"
msbuild_project_dir="visual_studio_integration/CUDAVisualStudioIntegration/extras/visual_studio_integration/MSBuildExtensions"
cuda_install_packages="nvcc_11.1 cuobjdump_11.1 nvprune_11.1 nvprof_11.1 cupti_11.1 cublas_11.1 cublas_dev_11.1 cudart_11.1 cufft_11.1 cufft_dev_11.1 curand_11.1 curand_dev_11.1 cusolver_11.1 cusolver_dev_11.1 cusparse_11.1 cusparse_dev_11.1 npp_11.1 npp_dev_11.1 nvrtc_11.1 nvrtc_dev_11.1 nvml_dev_11.1"
;;
11.3)
elif [[ "${CUDA_VERSION}" == "11.3" ]]; then
cuda_installer_name="cuda_11.3.0_465.89_win10"
msbuild_project_dir="visual_studio_integration/CUDAVisualStudioIntegration/extras/visual_studio_integration/MSBuildExtensions"
cuda_install_packages="thrust_11.3 nvcc_11.3 cuobjdump_11.3 nvprune_11.3 nvprof_11.3 cupti_11.3 cublas_11.3 cublas_dev_11.3 cudart_11.3 cufft_11.3 cufft_dev_11.3 curand_11.3 curand_dev_11.3 cusolver_11.3 cusolver_dev_11.3 cusparse_11.3 cusparse_dev_11.3 npp_11.3 npp_dev_11.3 nvrtc_11.3 nvrtc_dev_11.3 nvml_dev_11.3"
;;
*)
echo "CUDA_VERSION $CUDA_VERSION is not supported yet"
else
echo "This should not happen! ABORT."
exit 1
;;
esac
if [[ -f "/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_VERSION}/bin/nvcc.exe" ]]; then
echo "Existing CUDA v${CUDA_VERSION} installation found, skipping install"
fi
else
tmp_dir=$(mktemp -d)
(
# no need to popd after, the subshell shouldn't affect the parent shell
pushd "${tmp_dir}"
cuda_installer_link="https://ossci-windows.s3.amazonaws.com/${cuda_installer_name}.exe"
curl --retry 3 -kLO $cuda_installer_link
7z x ${cuda_installer_name}.exe -o${cuda_installer_name}
pushd ${cuda_installer_name}
mkdir cuda_install_logs
set +e
# This breaks for some reason if you quote cuda_install_packages
# shellcheck disable=SC2086
./setup.exe -s ${cuda_install_packages} -loglevel:6 -log:"$(pwd -W)/cuda_install_logs"
set -e
if [[ ! -f "/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_VERSION}/bin/nvcc.exe" ]]; then
echo "CUDA installation failed"
mkdir -p /c/w/build-results
7z a "c:\\w\\build-results\\cuda_install_logs.7z" cuda_install_logs
exit 1
fi
)
rm -rf "${tmp_dir}"
echo "CUDA_VERSION $CUDA_VERSION is not supported yet"
exit 1
fi
if [[ -f "/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64/nvToolsExt64_1.dll" ]]; then
echo "Existing nvtools installation found, skipping install"
else
# create tmp dir for download
tmp_dir=$(mktemp -d)
(
# no need to popd after, the subshell shouldn't affect the parent shell
pushd "${tmp_dir}"
curl --retry 3 -kLO https://ossci-windows.s3.amazonaws.com/NvToolsExt.7z
7z x NvToolsExt.7z -oNvToolsExt
mkdir -p "C:/Program Files/NVIDIA Corporation/NvToolsExt"
cp -r NvToolsExt/* "C:/Program Files/NVIDIA Corporation/NvToolsExt/"
)
rm -rf "${tmp_dir}"
if [[ "$cuda_major_version" == "11" && "${JOB_EXECUTOR:-}" == "windows-with-nvidia-gpu" ]]; then
cuda_install_packages="${cuda_install_packages} Display.Driver"
fi
cuda_installer_link="https://ossci-windows.s3.amazonaws.com/${cuda_installer_name}.exe"
curl --retry 3 -kLO $cuda_installer_link
7z x ${cuda_installer_name}.exe -o${cuda_installer_name}
cd ${cuda_installer_name}
mkdir cuda_install_logs
set +e
./setup.exe -s ${cuda_install_packages} -loglevel:6 -log:"$(pwd -W)/cuda_install_logs"
set -e
if [[ "${VC_YEAR}" == "2017" ]]; then
cp -r ${msbuild_project_dir}/* "C:/Program Files (x86)/Microsoft Visual Studio/2017/${VC_PRODUCT}/Common7/IDE/VC/VCTargets/BuildCustomizations/"
else
cp -r ${msbuild_project_dir}/* "C:/Program Files (x86)/Microsoft Visual Studio/2019/${VC_PRODUCT}/MSBuild/Microsoft/VC/v160/BuildCustomizations/"
fi
if ! ls "/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64/nvToolsExt64_1.dll"
then
curl --retry 3 -kLO https://ossci-windows.s3.amazonaws.com/NvToolsExt.7z
7z x NvToolsExt.7z -oNvToolsExt
mkdir -p "C:/Program Files/NVIDIA Corporation/NvToolsExt"
cp -r NvToolsExt/* "C:/Program Files/NVIDIA Corporation/NvToolsExt/"
export NVTOOLSEXT_PATH="C:\\Program Files\\NVIDIA Corporation\\NvToolsExt\\"
fi
if ! ls "/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_VERSION}/bin/nvcc.exe"
then
echo "CUDA installation failed"
mkdir -p /c/w/build-results
7z a "c:\\w\\build-results\\cuda_install_logs.7z" cuda_install_logs
exit 1
fi
cd ..
rm -rf ./${cuda_installer_name}
rm -f ./${cuda_installer_name}.exe

View File

@ -1,46 +1,32 @@
#!/bin/bash
set -eux -o pipefail
# This is typically blank but for CUDA 10* it'll be set to 10
windows_version_qualifier=""
cuda_major_version=${CUDA_VERSION%.*}
case ${CUDA_VERSION} in
10.1)
archive_version="v7.6.4.38"
windows_version_qualifier="10"
;;
10.2)
archive_version="v7.6.5.32"
windows_version_qualifier="10"
;;
11.1)
archive_version="v8.0.5.39"
;;
11.3)
archive_version="v8.2.0.53"
;;
*)
echo "CUDA_VERSION: ${CUDA_VERSION} not supported yet"
if [[ "$cuda_major_version" == "10" ]]; then
cudnn_installer_name="cudnn-${CUDA_VERSION}-windows10-x64-v7.6.4.38"
elif [[ "$cuda_major_version" == "11" ]]; then
if [[ "${CUDA_VERSION}" == "11.1" ]]; then
cudnn_installer_name="cudnn-${CUDA_VERSION}-windows-x64-v8.0.5.39"
elif [[ "${CUDA_VERSION}" == "11.3" ]]; then
cudnn_installer_name="cudnn-${CUDA_VERSION}-windows-x64-v8.2.0.53"
else
echo "This should not happen! ABORT."
exit 1
;;
esac
fi
else
echo "CUDNN for CUDA_VERSION $CUDA_VERSION is not supported yet"
exit 1
fi
cudnn_installer_name="cudnn_installer.zip"
cudnn_installer_link="https://ossci-windows.s3.amazonaws.com/cudnn-${CUDA_VERSION}-windows${windows_version_qualifier}-x64-${archive_version}.zip"
cudnn_installer_link="https://ossci-windows.s3.amazonaws.com/${cudnn_installer_name}.zip"
cudnn_install_folder="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_VERSION}/"
if [[ -f "${cudnn_install_folder}/include/cudnn.h" ]]; then
echo "Existing cudnn installation found, skipping install..."
else
tmp_dir=$(mktemp -d)
(
pushd "${tmp_dir}"
curl --retry 3 -o "${cudnn_installer_name}" "$cudnn_installer_link"
7z x "${cudnn_installer_name}" -ocudnn
# Use '${var:?}/*' to avoid potentially expanding to '/*'
# Remove all of the directories before attempting to copy files
rm -rf "${cudnn_install_folder:?}/*"
cp -rf cudnn/cuda/* "${cudnn_install_folder}"
)
rm -rf "${tmp_dir}"
fi
curl --retry 3 -O "$cudnn_installer_link"
7z x "${cudnn_installer_name}.zip" -ocudnn
# shellcheck recommends to use '${var:?}/*' to avoid potentially expanding to '/*'
# Remove all of the directories before attempting to copy files
rm -rf "${cudnn_install_folder:?}/*"
cp -rf cudnn/cuda/* "${cudnn_install_folder}"
rm -rf cudnn
rm -f "${cudnn_installer_name}.zip"

View File

@ -15,15 +15,11 @@ pytorch_params: &pytorch_params
build_only:
type: string
default: ""
ci_master:
type: string
default: ""
environment:
BUILD_ENVIRONMENT: << parameters.build_environment >>
DOCKER_IMAGE: << parameters.docker_image >>
USE_CUDA_DOCKER_RUNTIME: << parameters.use_cuda_docker_runtime >>
BUILD_ONLY: << parameters.build_only >>
CI_MASTER: << pipeline.parameters.run_master_build >>
resource_class: << parameters.resource_class >>
pytorch_android_params: &pytorch_android_params
@ -64,9 +60,6 @@ pytorch_ios_params: &pytorch_ios_params
lite_interpreter:
type: string
default: "1"
use_coreml:
type: string
default: "0"
environment:
BUILD_ENVIRONMENT: << parameters.build_environment >>
IOS_ARCH: << parameters.ios_arch >>
@ -74,7 +67,6 @@ pytorch_ios_params: &pytorch_ios_params
SELECTED_OP_LIST: << parameters.op_list >>
USE_PYTORCH_METAL: << parameters.use_metal >>
BUILD_LITE_INTERPRETER: << parameters.lite_interpreter >>
USE_COREML_DELEGATE: << parameters.use_coreml >>
pytorch_windows_params: &pytorch_windows_params
parameters:
@ -93,9 +85,6 @@ pytorch_windows_params: &pytorch_windows_params
python_version:
type: string
default: "3.8"
vs_version:
type: string
default: "16.8.6"
vc_version:
type: string
default: "14.16"
@ -113,7 +102,6 @@ pytorch_windows_params: &pytorch_windows_params
SCCACHE_BUCKET: "ossci-compiler-cache"
CUDA_VERSION: <<parameters.cuda_version>>
PYTHON_VERSION: <<parameters.python_version>>
VS_VERSION: <<parameters.vs_version>>
VC_VERSION: <<parameters.vc_version>>
VC_YEAR: <<parameters.vc_year>>
VC_PRODUCT: <<parameters.vc_product>>

View File

@ -171,4 +171,4 @@ commands:
cd ~/project
export ANDROID_BUILD_TYPE="<< parameters.build_type >>"
export COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
python3 -m tools.stats.upload_binary_size_to_scuba android
python3 .circleci/scripts/upload_binary_size_to_scuba.py android

View File

@ -29,7 +29,7 @@
cd /pytorch && export COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
python3 -mpip install requests && \
SCRIBE_GRAPHQL_ACCESS_TOKEN=${SCRIBE_GRAPHQL_ACCESS_TOKEN} \
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
python3 /pytorch/.circleci/scripts/upload_binary_size_to_scuba.py || exit 0
- persist_to_workspace:
root: /
paths: final_pkgs
@ -239,7 +239,7 @@
binary_ios_build:
<<: *pytorch_ios_params
macos:
xcode: "12.5.1"
xcode: "12.0"
steps:
- attach_workspace:
at: ~/workspace
@ -266,7 +266,7 @@
binary_ios_upload:
<<: *pytorch_ios_params
macos:
xcode: "12.5.1"
xcode: "12.0"
steps:
- attach_workspace:
at: ~/workspace

View File

@ -41,10 +41,9 @@
no_output_timeout: "1h"
command: |
set -ex
export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
# turn v1.12.0rc3 into 1.12.0
tag=$(echo $CIRCLE_TAG | sed -e 's/v*\([0-9.]*\).*/\1/')
tag=${CIRCLE_TAG:1:5}
target=${tag:-master}
echo "building for ${target}"
time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
@ -87,10 +86,8 @@
no_output_timeout: "1h"
command: |
set -ex
export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
# turn v1.12.0rc3 into 1.12.0
tag=$(echo $CIRCLE_TAG | sed -e 's/v*\([0-9.]*\).*/\1/')
tag=${CIRCLE_TAG:1:5}
target=${tag:-master}
echo "building for ${target}"
@ -129,7 +126,6 @@
set -e
export IN_CI=1
export CROSS_COMPILE_ARM64=1
export JOB_BASE_NAME=$CIRCLE_JOB
# Install sccache
sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
@ -166,7 +162,6 @@
command: |
set -e
export IN_CI=1
export JOB_BASE_NAME=$CIRCLE_JOB
# Install sccache
sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
@ -203,7 +198,6 @@
command: |
set -e
export IN_CI=1
export JOB_BASE_NAME=$CIRCLE_JOB
chmod a+x .jenkins/pytorch/macos-test.sh
unbuffer .jenkins/pytorch/macos-test.sh 2>&1 | ts
@ -214,14 +208,12 @@
set -ex
source /Users/distiller/workspace/miniconda3/bin/activate
pip install boto3
export IN_CI=1
export JOB_BASE_NAME=$CIRCLE_JOB
export PYTHONPATH="$PWD"
# Using the same IAM user to write stats to our OSS bucket
export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4}
export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4}
python -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
when: always
- store_test_results:
path: test/test-reports
@ -243,7 +235,6 @@
set -e
export IN_CI=1
export BUILD_LITE_INTERPRETER=1
export JOB_BASE_NAME=$CIRCLE_JOB
chmod a+x ${HOME}/project/.jenkins/pytorch/macos-lite-interpreter-build-test.sh
unbuffer ${HOME}/project/.jenkins/pytorch/macos-lite-interpreter-build-test.sh 2>&1 | ts
- store_test_results:
@ -267,7 +258,7 @@
no_output_timeout: "1h"
command: |
set -eux
docker_image_commit=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
docker_image_commit=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
docker_image_libtorch_android_x86_32=${docker_image_commit}-android-x86_32
docker_image_libtorch_android_x86_64=${docker_image_commit}-android-x86_64
@ -356,7 +347,7 @@
no_output_timeout: "1h"
command: |
set -eux
docker_image_commit=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
docker_image_commit=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
docker_image_libtorch_android_x86_32_gradle=${docker_image_commit}-android-x86_32-gradle
@ -393,7 +384,7 @@
no_output_timeout: "1h"
command: |
set -e
docker_image_libtorch_android_x86_32=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}-android-x86_32
docker_image_libtorch_android_x86_32=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}-android-x86_32
echo "docker_image_libtorch_android_x86_32: "${docker_image_libtorch_android_x86_32}
# x86
@ -440,7 +431,7 @@
echo "DOCKER_IMAGE: ${DOCKER_IMAGE}:${DOCKER_TAG}"
time docker pull ${DOCKER_IMAGE}:${DOCKER_TAG} >/dev/null
git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
git submodule sync && git submodule update -q --init --recursive --depth 1
VOLUME_MOUNTS="-v /home/circleci/project/:/var/lib/jenkins/workspace"
export id=$(docker run --env-file "${BASH_ENV}" ${VOLUME_MOUNTS} --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}:${DOCKER_TAG})
@ -456,7 +447,7 @@
pytorch_ios_build:
<<: *pytorch_ios_params
macos:
xcode: "12.5.1"
xcode: "12.0"
steps:
- checkout
- run_brew_for_ios_build
@ -470,17 +461,16 @@
# install fastlane
sudo gem install bundler && bundle install
# install certificates
echo ${IOS_CERT_KEY_2022} >> cert.txt
echo ${IOS_CERT_KEY} >> cert.txt
base64 --decode cert.txt -o Certificates.p12
rm cert.txt
bundle exec fastlane install_root_cert
bundle exec fastlane install_dev_cert
bundle exec fastlane install_cert
# install the provisioning profile
PROFILE=PyTorch_CI_2022.mobileprovision
PROFILE=PyTorch_CI_2021.mobileprovision
PROVISIONING_PROFILES=~/Library/MobileDevice/Provisioning\ Profiles
mkdir -pv "${PROVISIONING_PROFILES}"
cd "${PROVISIONING_PROFILES}"
echo ${IOS_SIGN_KEY_2022} >> cert.txt
echo ${IOS_SIGN_KEY} >> cert.txt
base64 --decode cert.txt -o ${PROFILE}
rm cert.txt
- run:
@ -510,7 +500,7 @@
# sync submodules
cd ${PROJ_ROOT}
git submodule sync
git submodule update --init --recursive --depth 1 --jobs 0
git submodule update --init --recursive --depth 1
# export
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
@ -538,8 +528,12 @@
no_output_timeout: "30m"
command: |
set -e
if [ ${BUILD_LITE_INTERPRETER} == 0 ]; then
echo "Run Build Test is not for full jit, skipping."
exit 0
fi
PROJ_ROOT=/Users/distiller/project
PROFILE=PyTorch_CI_2022
PROFILE=PyTorch_CI_2021
# run the ruby build script
if ! [ -x "$(command -v xcodebuild)" ]; then
echo 'Error: xcodebuild is not installed.'
@ -563,28 +557,21 @@
if [ ${IOS_PLATFORM} != "SIMULATOR" ]; then
echo "not SIMULATOR build, skip it."
exit 0
elif [ ${BUILD_LITE_INTERPRETER} == 0 ]; then
echo "Run Simulator Tests is not for full jit, skipping."
exit 0
fi
WORKSPACE=/Users/distiller/workspace
PROJ_ROOT=/Users/distiller/project
source ~/anaconda/bin/activate
# use the pytorch nightly build to generate models
conda install pytorch torchvision -c pytorch-nightly --yes
# generate models for differnet backends
pip install torch torchvision --progress-bar off
#run unit test
cd ${PROJ_ROOT}/ios/TestApp/benchmark
mkdir -p ../models
python trace_model.py
if [ ${BUILD_LITE_INTERPRETER} == 1 ]; then
ruby setup.rb --lite 1
else
ruby setup.rb
fi
ruby setup.rb
cd ${PROJ_ROOT}/ios/TestApp
instruments -s -devices
if [ ${BUILD_LITE_INTERPRETER} == 1 ]; then
fastlane scan --only_testing TestAppTests/TestAppTests/testLiteInterpreter
else
fastlane scan --only_testing TestAppTests/TestAppTests/testFullJIT
fi
fastlane scan
pytorch_linux_bazel_build:
<<: *pytorch_params
machine:
@ -606,7 +593,7 @@
echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
git submodule sync && git submodule update -q --init --recursive --depth 1
docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
@ -617,7 +604,7 @@
# Push intermediate Docker image for next phase to use
if [ -z "${BUILD_ONLY}" ]; then
# Augment our output image name with bazel to avoid collisions
output_image=${DOCKER_IMAGE}:build-${DOCKER_TAG}-bazel-${CIRCLE_SHA1}
output_image=${DOCKER_IMAGE}:${DOCKER_TAG}-bazel-${CIRCLE_SHA1}
export COMMIT_DOCKER_IMAGE=$output_image
docker commit "$id" ${COMMIT_DOCKER_IMAGE}
time docker push ${COMMIT_DOCKER_IMAGE}
@ -637,7 +624,7 @@
no_output_timeout: "90m"
command: |
set -e
output_image=${DOCKER_IMAGE}:build-${DOCKER_TAG}-bazel-${CIRCLE_SHA1}
output_image=${DOCKER_IMAGE}:${DOCKER_TAG}-bazel-${CIRCLE_SHA1}
export COMMIT_DOCKER_IMAGE=$output_image
echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
@ -697,7 +684,7 @@
no_output_timeout: "30m"
command: |
set -ex
export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})

View File

@ -30,11 +30,11 @@ jobs:
time docker pull ${DOCKER_IMAGE}:${DOCKER_TAG} >/dev/null
export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}:${DOCKER_TAG})
git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
git submodule sync && git submodule update -q --init --recursive --depth 1
docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
export COMMAND='((echo "sudo chown -R jenkins workspace && export JOB_BASE_NAME="$CIRCLE_JOB" && cd workspace && .jenkins/pytorch/build.sh && find ${BUILD_ROOT} -type f -name "*.a" -or -name "*.o" -delete") | docker exec -u jenkins -i "$id" bash) 2>&1'
export COMMAND='((echo "sudo chown -R jenkins workspace && export CIRCLE_JOB="$CIRCLE_JOB" && cd workspace && .jenkins/pytorch/build.sh && find ${BUILD_ROOT} -type f -name "*.a" -or -name "*.o" -delete") | docker exec -u jenkins -i "$id" bash) 2>&1'
echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
@ -47,7 +47,7 @@ jobs:
# The xla build uses the same docker image as
# pytorch_linux_bionic_py3_6_clang9_build. In the push step, we have to
# distinguish between them so the test can pick up the correct image.
output_image=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
output_image=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
if [[ ${BUILD_ENVIRONMENT} == *"xla"* ]]; then
export COMMIT_DOCKER_IMAGE=$output_image-xla
elif [[ ${BUILD_ENVIRONMENT} == *"libtorch"* ]]; then
@ -81,7 +81,7 @@ jobs:
cd /pytorch && export COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
python3 -mpip install requests && \
SCRIBE_GRAPHQL_ACCESS_TOKEN=${SCRIBE_GRAPHQL_ACCESS_TOKEN} \
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
python3 .circleci/scripts/upload_binary_size_to_scuba.py || exit 0
- store_artifacts:
path: /home/circleci/project/dist
@ -105,7 +105,7 @@ jobs:
export DOCKER_TAG="f3d89a32912f62815e4feaeed47e564e887dffd6"
fi
# See Note [Special build images]
output_image=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
output_image=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
if [[ ${BUILD_ENVIRONMENT} == *"xla"* ]]; then
export COMMIT_DOCKER_IMAGE=$output_image-xla
elif [[ ${BUILD_ENVIRONMENT} == *"libtorch"* ]]; then
@ -158,14 +158,13 @@ jobs:
}
if is_vanilla_build; then
echo "apt-get update || apt-get install libgnutls30" | docker exec -u root -i "$id" bash
echo "apt-get install -y qemu-user gdb" | docker exec -u root -i "$id" bash
echo "apt-get update && apt-get install -y qemu-user gdb" | docker exec -u root -i "$id" bash
echo "cd workspace/build; qemu-x86_64 -g 2345 -cpu Broadwell -E ATEN_CPU_CAPABILITY=default ./bin/basic --gtest_filter=BasicTest.BasicTestCPU & gdb ./bin/basic -ex 'set pagination off' -ex 'target remote :2345' -ex 'continue' -ex 'bt' -ex='set confirm off' -ex 'quit \$_isvoid(\$_exitcode)'" | docker exec -u jenkins -i "$id" bash
else
echo "Skipping for ${BUILD_ENVIRONMENT}"
fi
- run:
name: Test
name: Run tests
no_output_timeout: "90m"
command: |
set -e
@ -174,16 +173,7 @@ jobs:
# =================== The following code will be executed inside Docker container ===================
set -ex
export SCRIBE_GRAPHQL_ACCESS_TOKEN="${SCRIBE_GRAPHQL_ACCESS_TOKEN}"
export JOB_BASE_NAME="$CIRCLE_JOB"
# temporary fix for https://github.com/pytorch/pytorch/issues/60746
if [ -z "$CIRCLE_PR_NUMBER" ]; then
if [[ $CIRCLE_BRANCH =~ .*pull.* ]]; then
export PR_NUMBER="$(echo $CIRCLE_BRANCH | sed 's/[^0-9]//g')"
export CIRCLE_PR_NUMBER="$PR_NUMBER"
fi
else
export PR_NUMBER="$CIRCLE_PR_NUMBER"
fi
export CIRCLE_JOB="$CIRCLE_JOB"
${PARALLEL_FLAGS}
cd workspace
EOL
@ -230,10 +220,11 @@ jobs:
export CIRCLE_SHA1="$CIRCLE_SHA1"
export CIRCLE_PR_NUMBER="${CIRCLE_PR_NUMBER:-}"
export CIRCLE_BRANCH="$CIRCLE_BRANCH"
export JOB_BASE_NAME="$CIRCLE_JOB"
export CIRCLE_JOB="$CIRCLE_JOB"
export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
cd workspace
python -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
export PYTHONPATH="\${PWD}"
python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
EOL
echo "(cat docker_commands.sh | docker exec -u jenkins -e LANG=C.UTF-8 -i "$id" bash) 2>&1" > command.sh
unbuffer bash command.sh | ts
@ -263,9 +254,6 @@ jobs:
python_version:
type: string
default: "3.8"
vs_version:
type: string
default: "16.8.6"
vc_version:
type: string
default: "14.16"
@ -333,9 +321,6 @@ jobs:
python_version:
type: string
default: "3.8"
vs_version:
type: string
default: "16.8.6"
vc_version:
type: string
default: "14.16"
@ -391,8 +376,9 @@ jobs:
set -ex
export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_WIN_BUILD_V1}
export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
export PYTHONPATH="$PWD"
pip install typing_extensions boto3
python -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
when: always
- store_test_results:
path: test/test-reports

View File

@ -1,3 +1,169 @@
scheduled-ci:
triggers:
- schedule:
# runs every 4 hours on the 45th minute
cron: "45 0,4,8,12,16,20 * * *"
filters:
branches:
only:
- master
jobs:
- docker_build_job:
name: "docker-pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
image_name: "pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
- pytorch_linux_build:
name: periodic_pytorch_xenial_cuda11_3_cudnn8_gcc7_build
requires:
- "docker-pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
build_environment: "pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
- pytorch_linux_test:
name: periodic_pytorch_xenial_cuda11_3_cudnn8_gcc7_test
requires:
- periodic_pytorch_xenial_cuda11_3_cudnn8_gcc7_build
build_environment: "pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7-test"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
use_cuda_docker_runtime: "1"
resource_class: gpu.medium
- pytorch_linux_build:
name: periodic_libtorch_xenial_cuda11_3_cudnn8_gcc7_build
requires:
- "docker-pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
build_environment: "pytorch-libtorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
- pytorch_windows_build:
build_environment: pytorch-win-vs2019-cuda11-cudnn8-py3
cuda_version: "11.3"
name: periodic_pytorch_windows_cuda11.3_build
python_version: "3.8"
use_cuda: "1"
vc_product: BuildTools
vc_version: "14.28.29333"
vc_year: "2019"
- pytorch_windows_test:
build_environment: pytorch-win-vs2019-cuda11-cudnn8-py3
cuda_version: "11.3"
executor: windows-with-nvidia-gpu
name: periodic_pytorch_windows_cuda11.3_test1
python_version: "3.8"
requires:
- periodic_pytorch_windows_cuda11.3_build
test_name: pytorch-windows-test1
use_cuda: "1"
vc_product: BuildTools
vc_version: "14.28.29333"
vc_year: "2019"
- pytorch_windows_test:
build_environment: pytorch-win-vs2019-cuda11-cudnn8-py3
cuda_version: "11.3"
executor: windows-with-nvidia-gpu
name: periodic_pytorch_windows_cuda11.3_test2
python_version: "3.8"
requires:
- periodic_pytorch_windows_cuda11.3_build
test_name: pytorch-windows-test2
use_cuda: "1"
vc_product: BuildTools
vc_version: "14.28.29333"
vc_year: "2019"
# The following allows these jobs to run on ci-all and release branches
debuggable-scheduled-ci:
jobs:
- docker_build_job:
name: "docker-pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
image_name: "pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
filters:
branches:
only:
- /ci-all\/.*/
- /release\/.*/
- pytorch_linux_build:
name: pytorch_linux_xenial_cuda11_3_cudnn8_py3_gcc7_build
requires:
- "docker-pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
build_environment: "pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
filters:
branches:
only:
- /ci-all\/.*/
- /release\/.*/
- pytorch_linux_test:
name: pytorch_linux_xenial_cuda11_3_cudnn8_py3_gcc7_test
requires:
- pytorch_linux_xenial_cuda11_3_cudnn8_py3_gcc7_build
build_environment: "pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7-test"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
use_cuda_docker_runtime: "1"
resource_class: gpu.medium
filters:
branches:
only:
- /ci-all\/.*/
- /release\/.*/
- pytorch_linux_build:
name: pytorch_libtorch_linux_xenial_cuda11_3_cudnn8_py3_gcc7_build
requires:
- "docker-pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
build_environment: "pytorch-libtorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7-build"
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7"
filters:
branches:
only:
- /ci-all\/.*/
- /release\/.*/
- pytorch_windows_build:
build_environment: pytorch-win-vs2019-cuda11-cudnn8-py3
cuda_version: "11.3"
name: pytorch_windows_vs2019_py38_cuda11.3_build
python_version: "3.8"
use_cuda: "1"
vc_product: BuildTools
vc_version: "14.28.29333"
vc_year: "2019"
filters:
branches:
only:
- /ci-all\/.*/
- /release\/.*/
- pytorch_windows_test:
build_environment: pytorch-win-vs2019-cuda11-cudnn8-py3
cuda_version: "11.3"
executor: windows-with-nvidia-gpu
name: pytorch_windows_vs2019_py38_cuda11.3_test1
python_version: "3.8"
requires:
- pytorch_windows_vs2019_py38_cuda11.3_build
test_name: pytorch-windows-test1
use_cuda: "1"
vc_product: BuildTools
vc_version: "14.28.29333"
vc_year: "2019"
filters:
branches:
only:
- /ci-all\/.*/
- /release\/.*/
- pytorch_windows_test:
build_environment: pytorch-win-vs2019-cuda11-cudnn8-py3
cuda_version: "11.3"
executor: windows-with-nvidia-gpu
name: pytorch_windows_vs2019_py38_cuda11.3_test2
python_version: "3.8"
requires:
- pytorch_windows_vs2019_py38_cuda11.3_build
test_name: pytorch-windows-test2
use_cuda: "1"
vc_product: BuildTools
vc_version: "14.28.29333"
vc_year: "2019"
filters:
branches:
only:
- /ci-all\/.*/
- /release\/.*/
# the following clones pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7's tests but enables
# slow tests and sets an environment variable so gradcheck runs with fast_mode=False
slow-gradcheck-scheduled-ci:

View File

@ -9,7 +9,6 @@ bugprone-*,
-bugprone-reserved-identifier,
cppcoreguidelines-*,
-cppcoreguidelines-avoid-magic-numbers,
-cppcoreguidelines-avoid-non-const-global-variables,
-cppcoreguidelines-interfaces-global-init,
-cppcoreguidelines-macro-usage,
-cppcoreguidelines-owning-memory,
@ -22,7 +21,6 @@ cppcoreguidelines-*,
-cppcoreguidelines-pro-type-union-access,
-cppcoreguidelines-pro-type-vararg,
-cppcoreguidelines-special-member-functions,
-cppcoreguidelines-non-private-member-variables-in-classes,
-facebook-hte-RelativeInclude,
hicpp-exception-baseclass,
hicpp-avoid-goto,
@ -39,6 +37,5 @@ performance-*,
'
HeaderFilterRegex: 'torch/csrc/.*'
AnalyzeTemporaryDtors: false
WarningsAsErrors: '*'
CheckOptions:
...

5
.gitattributes vendored
View File

@ -1,4 +1 @@
*.bat text eol=crlf
.circleci/config.yml linguist-generated=true
.github/workflows/generated-*.yml linguist-generated=true
.github/generated-* linguist-generated=true
*.bat text eol=crlf

View File

@ -1,5 +1,5 @@
---
name: "\U0001F680 Feature Request"
name: "\U0001F680Feature Request"
about: Submit a proposal/request for a new PyTorch feature
---

View File

@ -1,8 +0,0 @@
self-hosted-runner:
labels:
- linux.2xlarge
- linux.8xlarge.nvidia.gpu
- linux.16xlarge.nvidia.gpu
- windows.4xlarge
- windows.8xlarge.nvidia.gpu
- bm-runner

View File

@ -1,102 +0,0 @@
{
"__comment": "@generated DO NOT EDIT MANUALLY, Generation script: .github/scripts/generate_ci_workflows.py",
"label_rules": {
"ciflow/all": [
"libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
"libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
"linux-bionic-cuda10.2-py3.9-gcc7",
"linux-bionic-py3.6-clang9",
"linux-bionic-py3.8-gcc9-coverage",
"linux-xenial-cuda10.2-py3.6-gcc7",
"linux-xenial-cuda11.3-py3.6-gcc7",
"linux-xenial-py3.6-gcc5.4",
"linux-xenial-py3.6-gcc7-bazel-test",
"parallelnative-linux-xenial-py3.6-gcc5.4",
"periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
"periodic-linux-xenial-cuda11.1-py3.6-gcc7",
"periodic-win-vs2019-cuda11.1-py3",
"puretorch-linux-xenial-py3.6-gcc5.4",
"win-vs2019-cpu-py3",
"win-vs2019-cuda10.2-py3",
"win-vs2019-cuda11.3-py3"
],
"ciflow/bazel": [
"linux-xenial-py3.6-gcc7-bazel-test"
],
"ciflow/coverage": [
"linux-bionic-py3.8-gcc9-coverage"
],
"ciflow/cpu": [
"linux-bionic-py3.6-clang9",
"linux-bionic-py3.8-gcc9-coverage",
"linux-xenial-py3.6-gcc5.4",
"linux-xenial-py3.6-gcc7-bazel-test",
"parallelnative-linux-xenial-py3.6-gcc5.4",
"puretorch-linux-xenial-py3.6-gcc5.4",
"win-vs2019-cpu-py3"
],
"ciflow/cuda": [
"libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
"libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
"linux-bionic-cuda10.2-py3.9-gcc7",
"linux-xenial-cuda10.2-py3.6-gcc7",
"linux-xenial-cuda11.3-py3.6-gcc7",
"periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
"periodic-linux-xenial-cuda11.1-py3.6-gcc7",
"periodic-win-vs2019-cuda11.1-py3",
"win-vs2019-cuda10.2-py3",
"win-vs2019-cuda11.3-py3"
],
"ciflow/default": [
"linux-bionic-py3.6-clang9",
"linux-bionic-py3.8-gcc9-coverage",
"linux-xenial-cuda11.3-py3.6-gcc7",
"linux-xenial-py3.6-gcc5.4",
"linux-xenial-py3.6-gcc7-bazel-test",
"win-vs2019-cpu-py3",
"win-vs2019-cuda11.3-py3"
],
"ciflow/libtorch": [
"libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
"libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
"periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7"
],
"ciflow/linux": [
"libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
"libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
"linux-bionic-cuda10.2-py3.9-gcc7",
"linux-bionic-py3.6-clang9",
"linux-bionic-py3.8-gcc9-coverage",
"linux-xenial-cuda10.2-py3.6-gcc7",
"linux-xenial-cuda11.3-py3.6-gcc7",
"linux-xenial-py3.6-gcc5.4",
"linux-xenial-py3.6-gcc7-bazel-test",
"parallelnative-linux-xenial-py3.6-gcc5.4",
"periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
"periodic-linux-xenial-cuda11.1-py3.6-gcc7",
"puretorch-linux-xenial-py3.6-gcc5.4"
],
"ciflow/noarch": [
"linux-bionic-py3.6-clang9"
],
"ciflow/scheduled": [
"periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
"periodic-linux-xenial-cuda11.1-py3.6-gcc7",
"periodic-win-vs2019-cuda11.1-py3"
],
"ciflow/slow": [
"linux-bionic-cuda10.2-py3.9-gcc7",
"linux-xenial-cuda10.2-py3.6-gcc7"
],
"ciflow/win": [
"periodic-win-vs2019-cuda11.1-py3",
"win-vs2019-cpu-py3",
"win-vs2019-cuda10.2-py3",
"win-vs2019-cuda11.3-py3"
],
"ciflow/xla": [
"linux-bionic-py3.6-clang9"
]
},
"version": "v1"
}

View File

@ -13,9 +13,5 @@ labels_to_circle_params:
- run_build
ci/master:
parameter: run_master_build
set_to_false:
- run_build
ci/slow-gradcheck:
parameter: run_slow_gradcheck_build
set_to_false:
- run_build

View File

@ -1,2 +1 @@
tracking_issue: 24422
ciflow_tracking_issue: 64124

View File

@ -1,6 +0,0 @@
#!/bin/bash -e
# Allows this script to be invoked from any directory:
cd "$(dirname "$0")"
python3 scripts/generate_ci_workflows.py

View File

@ -27,11 +27,6 @@ runner_types:
os: linux
max_available: 50
disk_size: 150
linux.16xlarge.nvidia.gpu:
instance_type: g3.16xlarge
os: linux
max_available: 10
disk_size: 150
windows.4xlarge:
instance_type: c5d.4xlarge
os: windows

View File

@ -13,10 +13,7 @@ WORKFLOWS = REPO_ROOT / ".github" / "workflows"
def concurrency_key(filename: Path) -> str:
workflow_name = filename.with_suffix("").name.replace("_", "-")
if workflow_name.startswith("generated-"):
workflow_name = workflow_name[len("generated-"):]
return f"{workflow_name}-${{{{ github.event.pull_request.number || github.sha }}}}" \
"-${{ github.event_name == 'workflow_dispatch' }}"
return f"{workflow_name}-${{{{ github.event.pull_request.number || github.sha }}}}"
def should_check(filename: Path) -> bool:

View File

@ -1,586 +1,222 @@
#!/usr/bin/env python3
from dataclasses import asdict, dataclass, field
from pathlib import Path
from typing import Dict, Set
from typing import Any, Dict
import jinja2
import json
import os
import sys
from typing_extensions import Literal
YamlShellBool = Literal["''", 1]
Arch = Literal["windows", "linux"]
DOCKER_REGISTRY = "308535385114.dkr.ecr.us-east-1.amazonaws.com"
GITHUB_DIR = Path(__file__).resolve().parent.parent
GITHUB_DIR = Path(__file__).parent.parent
# it would be nice to statically specify that build_environment must be
# present, but currently Python has no easy way to do that
# https://github.com/python/mypy/issues/4617
PyTorchWorkflow = Dict[str, Any]
WINDOWS_CPU_TEST_RUNNER = "windows.4xlarge"
WINDOWS_CUDA_TEST_RUNNER = "windows.8xlarge.nvidia.gpu"
WINDOWS_RUNNERS = {
WINDOWS_CPU_TEST_RUNNER,
WINDOWS_CUDA_TEST_RUNNER,
}
def PyTorchWindowsWorkflow(
*,
build_environment: str,
test_runner_type: str,
cuda_version: str,
on_pull_request: bool = False
) -> PyTorchWorkflow:
return {
"build_environment": build_environment,
"test_runner_type": test_runner_type,
"cuda_version": cuda_version,
"on_pull_request": on_pull_request,
}
LINUX_CPU_TEST_RUNNER = "linux.2xlarge"
LINUX_CUDA_TEST_RUNNER = "linux.8xlarge.nvidia.gpu"
LINUX_RUNNERS = {
LINUX_CPU_TEST_RUNNER,
LINUX_CUDA_TEST_RUNNER,
}
CUDA_RUNNERS = {
WINDOWS_CUDA_TEST_RUNNER,
LINUX_CUDA_TEST_RUNNER,
}
CPU_RUNNERS = {
WINDOWS_CPU_TEST_RUNNER,
LINUX_CPU_TEST_RUNNER,
}
LABEL_CIFLOW_ALL = "ciflow/all"
LABEL_CIFLOW_BAZEL = "ciflow/bazel"
LABEL_CIFLOW_COVERAGE = "ciflow/coverage"
LABEL_CIFLOW_CPU = "ciflow/cpu"
LABEL_CIFLOW_CUDA = "ciflow/cuda"
LABEL_CIFLOW_DEFAULT = "ciflow/default"
LABEL_CIFLOW_LIBTORCH = "ciflow/libtorch"
LABEL_CIFLOW_LINUX = "ciflow/linux"
LABEL_CIFLOW_SCHEDULED = "ciflow/scheduled"
LABEL_CIFLOW_SLOW = "ciflow/slow"
LABEL_CIFLOW_WIN = "ciflow/win"
LABEL_CIFLOW_XLA = "ciflow/xla"
LABEL_CIFLOW_NOARCH = "ciflow/noarch"
@dataclass
class CIFlowConfig:
enabled: bool = False
# For use to enable workflows to run on pytorch/pytorch-canary
run_on_canary: bool = False
labels: Set[str] = field(default_factory=set)
trigger_action: str = 'unassigned'
trigger_actor: str = 'pytorchbot'
root_job_name: str = 'ciflow_should_run'
root_job_condition: str = ''
# trigger_action_only controls if we listen only on the trigger_action of a pull_request.
# If it's False, we listen on all default pull_request actions, this is useful when
# ciflow (via probot) is not automated yet.
trigger_action_only: bool = False
def gen_root_job_condition(self) -> None:
# TODO: Make conditions strict
# At the beginning of the rollout of ciflow, we keep everything the same as what we have
# Once fully rollout, we can have strict constraints
# e.g. ADD env.GITHUB_ACTOR == '{self.trigger_actor}
# REMOVE github.event.action !='{self.trigger_action}'
label_conditions = [
f"contains(github.event.pull_request.labels.*.name, '{label}')" for label in sorted(self.labels)]
if self.run_on_canary:
self.root_job_condition = "(github.repository_owner == 'pytorch') && "
else:
self.root_job_condition = "(github.repository == 'pytorch/pytorch') && "
self.root_job_condition += f"((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || " \
f"(github.event.action !='{self.trigger_action}') || " \
f"({' || '.join(label_conditions)}))"
def reset_root_job(self) -> None:
self.root_job_name = ''
self.root_job_condition = ''
def __post_init__(self) -> None:
if not self.enabled:
self.reset_root_job()
return
self.labels.add(LABEL_CIFLOW_ALL)
self.gen_root_job_condition()
def PyTorchLinuxWorkflow(
*,
build_environment: str,
docker_image_base: str,
test_runner_type: str,
on_pull_request: bool = False,
enable_doc_jobs: bool = False,
) -> PyTorchWorkflow:
return {
"build_environment": build_environment,
"docker_image_base": docker_image_base,
"test_runner_type": test_runner_type,
"on_pull_request": on_pull_request,
"enable_doc_jobs": enable_doc_jobs,
}
@dataclass
class CIFlowRuleset:
version = 'v1'
output_file = f'{GITHUB_DIR}/generated-ciflow-ruleset.json'
label_rules: Dict[str, Set[str]] = field(default_factory=dict)
def add_label_rule(self, labels: Set[str], workflow_name: str) -> None:
for label in labels:
if label in self.label_rules:
self.label_rules[label].add(workflow_name)
else:
self.label_rules[label] = {workflow_name}
def generate_json(self) -> None:
GENERATED = "generated" # Note that please keep the variable GENERATED otherwise phabricator will hide the whole file
output = {
"__comment": f"@{GENERATED} DO NOT EDIT MANUALLY, Generation script: .github/scripts/generate_ci_workflows.py",
"version": self.version,
"label_rules": {
label: sorted(list(workflows))
for label, workflows in self.label_rules.items()
}
}
with open(self.output_file, 'w') as outfile:
json.dump(output, outfile, indent=2, sort_keys=True)
outfile.write('\n')
@dataclass
class CIWorkflow:
# Required fields
arch: Arch
build_environment: str
test_runner_type: str
# Optional fields
ciflow_config: CIFlowConfig = field(default_factory=CIFlowConfig)
cuda_version: str = ''
docker_image_base: str = ''
enable_doc_jobs: bool = False
exclude_test: bool = False
is_coverage: bool = False
is_libtorch: bool = False
is_scheduled: str = ''
num_test_shards: int = 1
on_pull_request: bool = False
only_build_on_pull_request: bool = False
only_run_smoke_tests_on_pull_request: bool = False
num_test_shards_on_pull_request: int = -1
distributed_test: bool = True
# The following variables will be set as environment variables,
# so it's easier for both shell and Python scripts to consume it if false is represented as the empty string.
enable_jit_legacy_test: YamlShellBool = "''"
enable_distributed_test: YamlShellBool = "''"
enable_multigpu_test: YamlShellBool = "''"
enable_nogpu_no_avx_test: YamlShellBool = "''"
enable_nogpu_no_avx2_test: YamlShellBool = "''"
enable_slow_test: YamlShellBool = "''"
enable_docs_test: YamlShellBool = "''"
enable_backwards_compat_test: YamlShellBool = "''"
enable_xla_test: YamlShellBool = "''"
enable_noarch_test: YamlShellBool = "''"
def __post_init__(self) -> None:
if self.is_libtorch:
self.exclude_test = True
if not self.on_pull_request:
self.only_build_on_pull_request = False
if self.distributed_test:
self.enable_distributed_test = 1
# If num_test_shards_on_pull_request is not user-defined, default to num_test_shards unless we are
# only running smoke tests on the pull request.
if self.num_test_shards_on_pull_request == -1:
# Don't waste resources on runner spinup and cooldown for another shard if we are only running a few tests
if self.only_run_smoke_tests_on_pull_request:
self.num_test_shards_on_pull_request = 1
else:
self.num_test_shards_on_pull_request = self.num_test_shards
self.assert_valid()
def assert_valid(self) -> None:
err_message = f"invalid test_runner_type for {self.arch}: {self.test_runner_type}"
if self.arch == 'linux':
assert self.test_runner_type in LINUX_RUNNERS, err_message
if self.arch == 'windows':
assert self.test_runner_type in WINDOWS_RUNNERS, err_message
if self.ciflow_config.enabled:
# make sure if LABEL_CIFLOW_DEFAULT is set, we then need to set trigger_action_only to False
assert self.ciflow_config.trigger_action_only != (LABEL_CIFLOW_DEFAULT in self.ciflow_config.labels)
assert self.on_pull_request
assert LABEL_CIFLOW_ALL in self.ciflow_config.labels
assert LABEL_CIFLOW_ALL in self.ciflow_config.root_job_condition
if self.arch == 'linux':
assert LABEL_CIFLOW_LINUX in self.ciflow_config.labels
if self.arch == 'windows':
assert LABEL_CIFLOW_WIN in self.ciflow_config.labels
if self.test_runner_type in CUDA_RUNNERS:
assert LABEL_CIFLOW_CUDA in self.ciflow_config.labels
if self.test_runner_type in CPU_RUNNERS:
assert LABEL_CIFLOW_CPU in self.ciflow_config.labels
def generate_workflow_file(self, workflow_template: jinja2.Template) -> None:
output_file_path = GITHUB_DIR / f"workflows/generated-{self.build_environment}.yml"
with open(output_file_path, "w") as output_file:
GENERATED = "generated" # Note that please keep the variable GENERATED otherwise phabricator will hide the whole file
output_file.writelines([f"# @{GENERATED} DO NOT EDIT MANUALLY\n"])
try:
content = workflow_template.render(asdict(self))
except Exception as e:
print(f"Failed on template: {workflow_template}", file=sys.stderr)
raise e
output_file.write(content)
if content[-1] != "\n":
output_file.write("\n")
print(output_file_path)
def generate_workflow_file(
*,
workflow: PyTorchWorkflow,
workflow_template: jinja2.Template,
) -> Path:
output_file_path = GITHUB_DIR / f"workflows/{workflow['build_environment']}.yml"
with open(output_file_path, "w") as output_file:
GENERATED = "generated"
output_file.writelines([f"# @{GENERATED} DO NOT EDIT MANUALLY\n"])
output_file.write(workflow_template.render(**workflow))
output_file.write("\n")
return output_file_path
WINDOWS_WORKFLOWS = [
CIWorkflow(
arch="windows",
build_environment="win-vs2019-cpu-py3",
PyTorchWindowsWorkflow(
build_environment="pytorch-win-vs2019-cpu-py3",
cuda_version="cpu",
test_runner_type=WINDOWS_CPU_TEST_RUNNER,
on_pull_request=True,
num_test_shards=2,
ciflow_config=CIFlowConfig(
enabled=True,
run_on_canary=True,
labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_CPU, LABEL_CIFLOW_WIN}
),
on_pull_request=True
),
CIWorkflow(
arch="windows",
build_environment="win-vs2019-cuda10.2-py3",
cuda_version="10.2",
PyTorchWindowsWorkflow(
build_environment="pytorch-win-vs2019-cuda10-cudnn7-py3",
cuda_version="10.1",
test_runner_type=WINDOWS_CUDA_TEST_RUNNER,
on_pull_request=True,
num_test_shards=2,
ciflow_config=CIFlowConfig(
enabled=True,
trigger_action_only=True,
labels={LABEL_CIFLOW_CUDA, LABEL_CIFLOW_WIN}
),
),
CIWorkflow(
arch="windows",
build_environment="win-vs2019-cuda11.3-py3",
cuda_version="11.3",
test_runner_type=WINDOWS_CUDA_TEST_RUNNER,
num_test_shards=2,
on_pull_request=True,
only_run_smoke_tests_on_pull_request=True,
ciflow_config=CIFlowConfig(
enabled=True,
run_on_canary=True,
labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_CUDA, LABEL_CIFLOW_WIN}
),
),
CIWorkflow(
arch="windows",
build_environment="periodic-win-vs2019-cuda11.1-py3",
PyTorchWindowsWorkflow(
build_environment="pytorch-win-vs2019-cuda11-cudnn8-py3",
cuda_version="11.1",
test_runner_type=WINDOWS_CUDA_TEST_RUNNER,
num_test_shards=2,
is_scheduled="45 0,4,8,12,16,20 * * *",
on_pull_request=True,
ciflow_config=CIFlowConfig(
enabled=True,
trigger_action_only=True,
labels={LABEL_CIFLOW_SCHEDULED, LABEL_CIFLOW_WIN, LABEL_CIFLOW_CUDA}
),
),
)
]
LINUX_WORKFLOWS = [
CIWorkflow(
arch="linux",
build_environment="linux-xenial-py3.6-gcc5.4",
PyTorchLinuxWorkflow(
build_environment="pytorch-linux-xenial-py3.6-gcc5.4",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
test_runner_type=LINUX_CPU_TEST_RUNNER,
on_pull_request=True,
enable_jit_legacy_test=1,
enable_doc_jobs=True,
enable_docs_test=1,
enable_backwards_compat_test=1,
num_test_shards=2,
ciflow_config=CIFlowConfig(
enabled=True,
run_on_canary=True,
labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CPU}
),
),
# ParallelTBB does not have a maintainer and is currently flaky
# CIWorkflow(
# arch="linux",
# build_environment="paralleltbb-linux-xenial-py3.6-gcc5.4",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# # This is a master only job despite on_pull_request is set to True
# on_pull_request=True,
# ciflow_config=CIFlowConfig(
# enabled=True,
# trigger_action_only=True,
# labels={LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CPU},
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-paralleltbb-linux-xenial-py3.6-gcc5.4",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
CIWorkflow(
arch="linux",
build_environment="parallelnative-linux-xenial-py3.6-gcc5.4",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
test_runner_type=LINUX_CPU_TEST_RUNNER,
# This is a master only job despite on_pull_request is set to True
on_pull_request=True,
ciflow_config=CIFlowConfig(
enabled=True,
trigger_action_only=True,
labels={LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CPU},
),
),
# Build PyTorch with BUILD_CAFFE2=OFF
CIWorkflow(
arch="linux",
build_environment="puretorch-linux-xenial-py3.6-gcc5.4",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
test_runner_type=LINUX_CPU_TEST_RUNNER,
exclude_test=True,
# This is a master only job despite on_pull_request is set to True
on_pull_request=True,
ciflow_config=CIFlowConfig(
enabled=True,
trigger_action_only=True,
labels={LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CPU},
),
),
# CIWorkflow(
# arch="linux",
# build_environment="linux-xenial-py3.6-gcc7",
# PyTorchLinuxWorkflow(
# build_environment="pytorch-parallelnative-linux-xenial-py3.6-gcc5.4",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-pure_torch-linux-xenial-py3.6-gcc5.4",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc5.4",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3.6-gcc7",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc7",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
# CIWorkflow(
# arch="linux",
# build_environment="linux-xenial-py3.6-clang5-asan",
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3.6-clang5-asan",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-asan",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
# CIWorkflow(
# arch="linux",
# build_environment="linux-xenial-py3.6-clang7-onnx",
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3.6-clang7-onnx",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang7-onnx",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
CIWorkflow(
arch="linux",
build_environment="linux-bionic-cuda10.2-py3.9-gcc7",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7",
test_runner_type=LINUX_CUDA_TEST_RUNNER,
num_test_shards=2,
on_pull_request=True,
ciflow_config=CIFlowConfig(
enabled=True,
run_on_canary=True,
trigger_action_only=True,
labels={LABEL_CIFLOW_SLOW, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CUDA}
),
),
CIWorkflow(
arch="linux",
build_environment="linux-xenial-cuda10.2-py3.6-gcc7",
PyTorchLinuxWorkflow(
build_environment="pytorch-linux-xenial-cuda10.2-cudnn7-py3.6-gcc7",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
test_runner_type=LINUX_CUDA_TEST_RUNNER,
enable_jit_legacy_test=1,
enable_multigpu_test=1,
enable_nogpu_no_avx_test=1,
enable_nogpu_no_avx2_test=1,
enable_slow_test=1,
num_test_shards=2,
on_pull_request=True,
ciflow_config=CIFlowConfig(
enabled=True,
trigger_action_only=True,
labels=set([LABEL_CIFLOW_SLOW, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CUDA]),
),
),
CIWorkflow(
arch="linux",
build_environment="libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
test_runner_type=LINUX_CUDA_TEST_RUNNER,
is_libtorch=True,
on_pull_request=True,
ciflow_config=CIFlowConfig(
enabled=True,
trigger_action_only=True,
labels=set([LABEL_CIFLOW_LIBTORCH, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CUDA]),
),
),
CIWorkflow(
arch="linux",
build_environment="linux-xenial-cuda11.3-py3.6-gcc7",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7",
test_runner_type=LINUX_CUDA_TEST_RUNNER,
num_test_shards=2,
on_pull_request=True,
ciflow_config=CIFlowConfig(
enabled=True,
labels=set([LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CUDA]),
),
),
CIWorkflow(
arch="linux",
build_environment="libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7",
test_runner_type=LINUX_CUDA_TEST_RUNNER,
is_libtorch=True,
on_pull_request=True,
ciflow_config=CIFlowConfig(
enabled=True,
trigger_action_only=True,
labels=set([LABEL_CIFLOW_LIBTORCH, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CUDA]),
),
),
CIWorkflow(
arch="linux",
build_environment="periodic-linux-xenial-cuda11.1-py3.6-gcc7",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
test_runner_type=LINUX_CUDA_TEST_RUNNER,
num_test_shards=2,
is_scheduled="45 0,4,8,12,16,20 * * *",
on_pull_request=True,
ciflow_config=CIFlowConfig(
enabled=True,
trigger_action_only=True,
labels={LABEL_CIFLOW_SCHEDULED, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CUDA}
),
),
CIWorkflow(
arch="linux",
build_environment="periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
test_runner_type=LINUX_CUDA_TEST_RUNNER,
is_libtorch=True,
is_scheduled="45 0,4,8,12,16,20 * * *",
on_pull_request=True,
ciflow_config=CIFlowConfig(
enabled=True,
trigger_action_only=True,
labels={LABEL_CIFLOW_SCHEDULED, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_LIBTORCH, LABEL_CIFLOW_CUDA},
),
),
CIWorkflow(
arch="linux",
build_environment="linux-bionic-py3.8-gcc9-coverage",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.8-gcc9",
test_runner_type=LINUX_CPU_TEST_RUNNER,
on_pull_request=True,
is_coverage=True,
num_test_shards=2,
ciflow_config=CIFlowConfig(
enabled=True,
labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_COVERAGE, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CPU},
),
),
CIWorkflow(
arch="linux",
build_environment="linux-bionic-py3.6-clang9",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
test_runner_type=LINUX_CPU_TEST_RUNNER,
on_pull_request=True,
num_test_shards=2,
distributed_test=False,
enable_noarch_test=1,
ciflow_config=CIFlowConfig(
enabled=True,
labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_CPU, LABEL_CIFLOW_XLA, LABEL_CIFLOW_NOARCH},
),
),
# CIWorkflow(
# arch="linux",
# build_environment="linux-bionic-rocm3.9-py3.6",
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-cuda11.1-cudnn8-py3.6-gcc7",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
# test_runner_type=LINUX_CUDA_TEST_RUNNER,
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-libtorch-linux-xenial-cuda11.1-cudnn8-py3.6-gcc7",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
# test_runner_type=LINUX_CUDA_TEST_RUNNER,
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-bionic-py3.6-clang9-noarch",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-xla-linux-bionic-py3.6-clang9",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-vulkan-linux-bionic-py3.6-clang9",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.6-clang9",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-bionic-py3.8-gcc9-coverage",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-py3.8-gcc9",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-bionic-rocm3.9-py3.6",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-rocm3.9-py3.6",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
# CIWorkflow(
# arch="linux",
# build_environment="linux-xenial-py3.6-clang5-android-ndk-r19c-x86_32",
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3.6-clang5-android-ndk-r19c-x86_32",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
# CIWorkflow(
# arch="linux",
# build_environment="linux-xenial-py3.6-clang5-android-ndk-r19c-x86_64",
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3.6-clang5-android-ndk-r19c-x86_64",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
# CIWorkflow(
# arch="linux",
# build_environment="linux-xenial-py3.6-clang5-android-ndk-r19c-arm-v7a",
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3.6-clang5-android-ndk-r19c-arm-v7a",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
# CIWorkflow(
# arch="linux",
# build_environment="linux-xenial-py3.6-clang5-android-ndk-r19c-arm-v8a",
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3.6-clang5-android-ndk-r19c-arm-v8a",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
# CIWorkflow(
# arch="linux",
# build_environment="linux-xenial-py3.6-clang5-mobile",
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3.6-clang5-mobile",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-asan",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
# CIWorkflow(
# arch="linux",
# build_environment="linux-xenial-py3.6-clang5-mobile-custom-dynamic",
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3.6-clang5-mobile-custom-dynamic",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
# CIWorkflow(
# arch="linux",
# build_environment="linux-xenial-py3.6-clang5-mobile-custom-static",
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3.6-clang5-mobile-custom-static",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
# CIWorkflow(
# arch="linux",
# build_environment="linux-xenial-py3.6-clang5-mobile-code-analysis",
# PyTorchLinuxWorkflow(
# build_environment="pytorch-linux-xenial-py3.6-clang5-mobile-code-analysis",
# docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
# test_runner_type=LINUX_CPU_TEST_RUNNER,
# ),
]
BAZEL_WORKFLOWS = [
CIWorkflow(
arch="linux",
build_environment="linux-xenial-py3.6-gcc7-bazel-test",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7",
test_runner_type=LINUX_CPU_TEST_RUNNER,
on_pull_request=True,
ciflow_config=CIFlowConfig(
enabled=True,
labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_BAZEL, LABEL_CIFLOW_CPU, LABEL_CIFLOW_LINUX},
),
),
]
if __name__ == "__main__":
jinja_env = jinja2.Environment(
variable_start_string="!{{",
loader=jinja2.FileSystemLoader(str(GITHUB_DIR.joinpath("templates"))),
undefined=jinja2.StrictUndefined,
)
template_and_workflows = [
(jinja_env.get_template("linux_ci_workflow.yml.j2"), LINUX_WORKFLOWS),
(jinja_env.get_template("windows_ci_workflow.yml.j2"), WINDOWS_WORKFLOWS),
(jinja_env.get_template("bazel_ci_workflow.yml.j2"), BAZEL_WORKFLOWS),
(jinja_env.get_template("windows_ci_workflow.yml.j2"), WINDOWS_WORKFLOWS)
]
# Delete the existing generated files first, this should align with .gitattributes file description.
existing_workflows = GITHUB_DIR.glob("workflows/generated-*")
for w in existing_workflows:
try:
os.remove(w)
except Exception as e:
print(f"Error occurred when deleting file {w}: {e}")
ciflow_ruleset = CIFlowRuleset()
for template, workflows in template_and_workflows:
for workflow in workflows:
workflow.generate_workflow_file(workflow_template=template)
if workflow.ciflow_config.enabled:
ciflow_ruleset.add_label_rule(workflow.ciflow_config.labels, workflow.build_environment)
elif workflow.on_pull_request:
# If ciflow is disabled but still on_pull_request, we can denote
# it as a special label LABEL_CIFLOW_DEFAULT in the ruleset, which will be later
# turned into an actual LABEL_CIFLOW_DEFAULT label in the workflow.
# During the rollout phase, it has the same effect as LABEL_CIFLOW_DEFAULT
ciflow_ruleset.add_label_rule({LABEL_CIFLOW_DEFAULT}, workflow.build_environment)
ciflow_ruleset.generate_json()
print(generate_workflow_file(workflow=workflow, workflow_template=template))

View File

@ -1,94 +0,0 @@
#!/usr/bin/env python3
"""Generates a matrix to be utilized through github actions
Will output a matrix to represent our testing configurations, which is currently
dictated by just sharding.
"""
import json
import os
import re
from typing import Dict
from typing_extensions import TypedDict
class Config(TypedDict):
num_shards: int
runner: str
def get_disabled_issues() -> str:
pr_body = os.getenv('PR_BODY', '')
# The below regex is meant to match all *case-insensitive* keywords that
# GitHub has delineated would link PRs to issues, more details here:
# https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue.
# E.g., "Close #62851", "fixES #62851" and "RESOLVED #62851" would all match, but not
# "closes #62851" --> extra space, "fixing #62851" --> not a keyword, nor "fix 62851" --> no #
regex = '(?i)(Close(d|s)?|Resolve(d|s)?|Fix(ed|es)?) #([0-9]+)'
issue_numbers = [x[4] for x in re.findall(regex, pr_body)]
return ','.join(issue_numbers)
def main() -> None:
TEST_RUNNER_TYPE = os.getenv('TEST_RUNNER_TYPE')
assert TEST_RUNNER_TYPE is not None
ON_PULL_REQUEST = os.getenv('GITHUB_HEAD_REF')
NUM_TEST_SHARDS_ON_PULL_REQUEST = os.getenv('NUM_TEST_SHARDS_ON_PULL_REQUEST')
NUM_TEST_SHARDS = int(os.getenv('NUM_TEST_SHARDS', '1'))
if ON_PULL_REQUEST and NUM_TEST_SHARDS_ON_PULL_REQUEST:
NUM_TEST_SHARDS = int(NUM_TEST_SHARDS_ON_PULL_REQUEST)
MULTIGPU_RUNNER_TYPE = os.getenv('MULTIGPU_RUNNER_TYPE')
NOGPU_RUNNER_TYPE = os.getenv('NOGPU_RUNNER_TYPE')
configs: Dict[str, Config] = {}
if os.getenv('ENABLE_JIT_LEGACY_TEST'):
configs['jit_legacy'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
if MULTIGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_MULTIGPU_TEST'):
configs['multigpu'] = {'num_shards': 1, 'runner': MULTIGPU_RUNNER_TYPE}
if NOGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_NOGPU_NO_AVX_TEST'):
configs['nogpu_NO_AVX'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
if NOGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_NOGPU_NO_AVX2_TEST'):
configs['nogpu_NO_AVX2'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
if os.getenv('ENABLE_DISTRIBUTED_TEST'):
configs['distributed'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
if os.getenv('ENABLE_SLOW_TEST'):
configs['slow'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
if os.getenv('ENABLE_DOCS_TEST'):
configs['docs_test'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
if os.getenv('ENABLE_BACKWARDS_COMPAT_TEST'):
configs['backwards_compat'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
if os.getenv('ENABLE_XLA_TEST'):
configs['xla'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
if os.getenv('ENABLE_NOARCH_TEST'):
configs['noarch'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
matrix = {
'include': [
{
'config': 'default',
'shard': shard,
'num_shards': NUM_TEST_SHARDS,
'runner': TEST_RUNNER_TYPE,
}
for shard in range(1, NUM_TEST_SHARDS + 1)
] + [
{
'config': name,
'shard': shard,
'num_shards': config['num_shards'],
'runner': config['runner'],
}
for name, config in configs.items()
for shard in range(1, config['num_shards'] + 1)
]
}
render_matrix = {'config': list(dict.fromkeys(x['config'] for x in matrix['include']))}
print(json.dumps({'matrix': matrix, 'render-matrix': render_matrix}, indent=2))
print(f'::set-output name=matrix::{json.dumps(matrix)}')
print(f'::set-output name=render-matrix::{json.dumps(render_matrix)}')
print(f'::set-output name=ignore-disabled-issues::{get_disabled_issues()}')
if __name__ == "__main__":
main()

View File

@ -65,8 +65,6 @@ class PytorchVersion:
self.no_build_suffix = no_build_suffix
def get_post_build_suffix(self) -> str:
if self.no_build_suffix:
return ""
if self.gpu_arch_type == "cuda":
return f"+cu{self.gpu_arch_version.replace('.', '')}"
return f"+{self.gpu_arch_type}{self.gpu_arch_version}"
@ -89,9 +87,9 @@ def main() -> None:
)
parser.add_argument(
"--no-build-suffix",
action="store_true",
type=strtobool,
help="Whether or not to add a build suffix typically (+cpu)",
default=strtobool(os.environ.get("NO_BUILD_SUFFIX", "False"))
default=os.environ.get("NO_BUILD_SUFFIX", False)
)
parser.add_argument(
"--gpu-arch-type",

View File

@ -1,11 +0,0 @@
function Get-SSH-Sessions {
Get-Process sshd -IncludeUserName |
Where-Object UserName -notLike "*SYSTEM*" |
Select-Object Id
}
$runningSessions = Get-SSH-Sessions
foreach ($session in $runningSessions) {
Stop-Process -id $session.Id
}

View File

@ -13,7 +13,6 @@ Testing environment:
# 1. Does not reuse the build artifact in other CI workflows
# 2. CI jobs are serialized because there is only one worker
import os
import git # type: ignore[import]
import pathlib
import argparse
import subprocess
@ -24,7 +23,6 @@ CUDA_VERSION = "cu102"
PYTHON_VERSION = "3.7"
TORCHBENCH_CONFIG_NAME = "config.yaml"
MAGIC_PREFIX = "RUN_TORCHBENCH:"
MAGIC_TORCHBENCH_PREFIX = "TORCHBENCH_BRANCH:"
ABTEST_CONFIG_TEMPLATE = """# This config is automatically generated by run_torchbench.py
start: {control}
end: {treatment}
@ -59,7 +57,7 @@ def extract_models_from_pr(torchbench_path: str, prbody_file: str) -> List[str]:
lines = map(lambda x: x.strip(), pf.read().splitlines())
magic_lines = list(filter(lambda x: x.startswith(MAGIC_PREFIX), lines))
if magic_lines:
# Only the first magic line will be recognized.
# Only the first magic line will be respected.
model_list = list(map(lambda x: x.strip(), magic_lines[0][len(MAGIC_PREFIX):].split(",")))
# Shortcut: if model_list is ["ALL"], run all the tests
if model_list == ["ALL"]:
@ -73,26 +71,6 @@ def extract_models_from_pr(torchbench_path: str, prbody_file: str) -> List[str]:
return []
return model_list
def identify_torchbench_branch(torchbench_path: str, prbody_file: str) -> None:
branch_name: str
with open(prbody_file, "r") as pf:
lines = map(lambda x: x.strip(), pf.read().splitlines())
magic_lines = list(filter(lambda x: x.startswith(MAGIC_TORCHBENCH_PREFIX), lines))
if magic_lines:
# Only the first magic line will be recognized.
branch_name = magic_lines[0][len(MAGIC_TORCHBENCH_PREFIX):].strip()
# If not specified, directly return without the branch checkout
if not branch_name:
return
try:
print(f"Checking out the TorchBench branch: {branch_name} ...")
repo = git.Repo(torchbench_path)
origin = repo.remotes.origin
origin.fetch(branch_name)
repo.create_head(branch_name, origin.refs[branch_name]).checkout()
except git.exc.GitCommandError:
raise RuntimeError(f'{branch_name} doesn\'t exist in the pytorch/benchmark repository. Please double check.')
def run_torchbench(pytorch_path: str, torchbench_path: str, output_dir: str) -> None:
# Copy system environment so that we will not override
env = dict(os.environ)
@ -118,12 +96,6 @@ if __name__ == "__main__":
if not models:
print("Can't parse the model filter from the pr body. Currently we only support allow-list.")
exit(1)
# Identify the specified TorchBench branch, verify the branch exists, and checkout the branch
try:
identify_torchbench_branch(args.torchbench_path, args.pr_body)
except RuntimeError as e:
print(f"Identify TorchBench branch failed: {str(e)}")
exit(1)
print(f"Ready to run TorchBench with benchmark. Result will be saved in the directory: {output_dir}.")
# Run TorchBench with the generated config
torchbench_config = gen_abtest_config(args.pr_base_sha, args.pr_head_sha, models)

View File

@ -1,17 +0,0 @@
function Get-SSH-Users {
# Gets ssh sessions for all users not named SYSTEM
Get-CimInstance -ClassName Win32_Process -Filter "Name = 'sshd.exe'" |
Get-CimAssociatedInstance -Association Win32_SessionProcess |
Get-CimAssociatedInstance -Association Win32_LoggedOnUser |
Where-Object {$_.Name -ne 'SYSTEM'} |
Measure-Object
}
$usersLoggedOn = Get-SSH-Users
Write-Output "Holding runner until all ssh sessions have logged out"
while ($usersLoggedOn.Count -gt 0) {
$usersLoggedOn = Get-SSH-Users
Write-Output "."
Start-Sleep -s 5
}

View File

@ -1,13 +0,0 @@
#!/usr/bin/env bash
set -eou pipefail
echo "Holding runner for 2 hours until all ssh sessions have logged out"
for _ in $(seq 1440); do
# Break if no ssh session exists anymore
if [ "$(who)" = "" ]; then
break
fi
echo "."
sleep 5
done

View File

@ -1,137 +0,0 @@
{%- extends "linux_ci_workflow.yml.j2" -%}
{%- set exclude_test = true -%}
{% block name -%}
# Template is at: .github/templates/bazel_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: !{{ build_environment }}
{%- endblock %}
on:
{%- if on_pull_request %}
pull_request:
{%- if ciflow_config.enabled %}
{%- if ciflow_config.trigger_action_only %}
types: [!{{ ciflow_config.trigger_action }}]
{%- else %}
types: [opened, synchronize, reopened, !{{ ciflow_config.trigger_action }}]
{%- endif %}
{%- endif %}
{%- else %}
# TODO: Enable pull_request builds when we can verify capacity can be met by auto-scalers
{%- endif %}
{% block build +%}
# building and testing in a single job since bazel runs only small subset of tests
build-and-test:
runs-on: !{{ test_runner_type }}
needs: [calculate-docker-image, !{{ ciflow_config.root_job_name }}]
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: !{{ build_environment }}-build-and-test
NUM_TEST_SHARDS: !{{ num_test_shards }}
CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
steps:
!{{ common.setup_ec2_linux() }}
!{{ common.checkout_pytorch("recursive") }}
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Determine shm-size
run: |
shm_size="1g"
case "${BUILD_ENVIRONMENT}" in
*cuda*)
shm_size="2g"
;;
*rocm*)
shm_size="8g"
;;
esac
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
- name: Output disk space left
run: |
sudo df -H
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Build
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e PR_LABELS \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e http_proxy="!{{ common.squid_proxy }}" -e https_proxy="!{{ common.squid_proxy }}" -e no_proxy="!{{ common.squid_no_proxy }}" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && sudo chown -R jenkins /dev && .jenkins/pytorch/build.sh'
!{{ common.parse_ref() }}
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Test
run: |
# detached container should get cleaned up by teardown_ec2_linux
export SHARD_NUMBER=0
# TODO: Stop building test binaries as part of the build phase
# Make sure we copy test results from bazel-testlogs symlink to
# a regular directory ./test/test-reports
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e GITHUB_ACTIONS \
-e IN_CI \
-e SHARD_NUMBER \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e CONTINUE_THROUGH_ERROR \
-e PR_LABELS \
-e http_proxy="!{{ common.squid_proxy }}" -e https_proxy="!{{ common.squid_proxy }}" -e no_proxy="!{{ common.squid_no_proxy }}" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="${SHM_SIZE}" \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && sudo chown -R jenkins /dev && .jenkins/pytorch/test.sh && cp -Lr ./bazel-testlogs ./test/test-reports'
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
!{{ common.upload_test_reports(name='bazel') }}
!{{ common.upload_test_statistics(build_environment) }}
!{{ common.teardown_ec2_linux() }}
{%- endblock %}

View File

@ -1,186 +0,0 @@
{%- set upload_artifact_s3_action = "seemethere/upload-artifact-s3@v3" -%}
{# squid_proxy is an private ELB that only available for GHA custom runners #}
{%- set squid_proxy = "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -%}
{# squid_no_proxy is a list of common set of fixed domains or IPs that we don't need to proxy. See https://docs.aws.amazon.com/AmazonECS/latest/developerguide/http_proxy_config.html#windows-proxy #}
{%- set squid_no_proxy = "localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" -%}
{%- macro concurrency(build_environment) -%}
concurrency:
group: !{{ build_environment }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
{%- endmacro -%}
{%- macro display_ec2_information() -%}
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
{%- endmacro -%}
{%- macro parse_ref() -%}
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
{%- endmacro -%}
{%- macro upload_test_statistics(build_environment) -%}
- name: Display and upload test statistics (Click Me)
if: always()
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
JOB_BASE_NAME: !{{ build_environment }}-test
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
shell: bash
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install boto3==1.16.34
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
{%- endmacro -%}
{%- macro setup_ec2_linux() -%}
!{{ display_ec2_information() }}
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
{%- endmacro -%}
{%- macro teardown_ec2_linux() -%}
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
{%- endmacro -%}
{%- macro checkout_pytorch(submodules) -%}
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: !{{ submodules }}
{%- endmacro -%}
{%- macro upload_test_reports(name) -%}
- name: Zip test reports for upload
if: always()
env:
{%- if name == 'linux' or name == 'windows' %}
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
{%- else %}
FILE_SUFFIX: '!{{ name }}-${{ github.job }}'
{%- endif %}
{%- if name == 'windows' %}
shell: powershell
run: |
# -ir => recursive include all files in pattern
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
{%- else %}
run: |
# Remove any previous test reports if they exist
rm -f test-reports-*.zip
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
{%- endif %}
- uses: actions/upload-artifact@v2
name: Store Test Reports
if: always()
with:
{%- if name == 'linux' or name == 'windows' %}
name: test-reports-${{ matrix.config }}
{%- else %}
name: test-reports-!{{ name }}
{%- endif %}
retention-days: 14
if-no-files-found: error
path:
{%- if name == 'windows' %}
pytorch-${{ github.run_id }}/test-reports-*.zip
{%- else %}
test-reports-*.zip
{%- endif %}
- uses: !{{ upload_artifact_s3_action }}
name: Store Test Reports on S3
if: always()
with:
retention-days: 14
if-no-files-found: error
path:
{%- if name == 'windows' %}
pytorch-${{ github.run_id }}/test-reports-*.zip
{%- else %}
test-reports-*.zip
{%- endif %}
{%- endmacro -%}
{%- macro render_test_results() -%}
- name: Install render_test_results dependencies
if: always()
shell: bash
run: |
python3 -m pip install junitparser==2.1.1 rich==10.9.0
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
if: always()
shell: bash
# Encoding is weird on windows, just try to default to utf-8 if possible
env:
PYTHONIOENCODING: "utf-8"
run: |
python3 tools/render_junit.py test/
{%- endmacro -%}

View File

@ -1,79 +1,55 @@
{% import 'common.yml.j2' as common %}
{%- block name -%}
# Template is at: .github/templates/linux_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: !{{ build_environment }}
{%- endblock %}
name: Linux CI (!{{ build_environment }})
on:
# TODO: Enable pull_request builds when we can verify capacity can be met by auto-scalers
{%- if on_pull_request %}
pull_request:
{%- if ciflow_config.enabled %}
{%- if ciflow_config.trigger_action_only %}
types: [!{{ ciflow_config.trigger_action }}]
{%- else %}
types: [opened, synchronize, reopened, !{{ ciflow_config.trigger_action }}]
{%- endif %}
{%- endif %}
{%- else %}
# TODO: Enable pull_request builds when we can verify capacity can be met by auto-scalers
{%- endif %}
{%- if is_scheduled %}
schedule:
- cron: !{{ is_scheduled }}
{%- else %}
push:
branches:
- master
- release/*
{%- endif %}
workflow_dispatch:
env:
BUILD_ENVIRONMENT: !{{ build_environment }}
DOCKER_IMAGE_BASE: !{{ docker_image_base }}
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
!{{ common.concurrency(build_environment) }}
concurrency:
group: !{{ build_environment }}-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true
jobs:
{%- if ciflow_config.enabled %}
!{{ ciflow_config.root_job_name }}:
runs-on: ubuntu-18.04
if: ${{ !{{ ciflow_config.root_job_condition }} }}
env:
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
steps:
- name: noop
run: echo running !{{ ciflow_config.root_job_name }}
- name: print labels
run: echo "${LABELS}"
{%- endif %}
calculate-docker-image:
runs-on: linux.2xlarge
{%- if ciflow_config.enabled %}
needs: [!{{ ciflow_config.root_job_name }}]
{%- endif %}
env:
DOCKER_BUILDKIT: 1
timeout-minutes: 90
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
!{{ common.setup_ec2_linux() }}
!{{ common.checkout_pytorch("false") }}
- name: Log in to ECR
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Checkout PyTorch
uses: actions/checkout@v2
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
- name: Calculate docker image tag
id: calculate-tag
run: |
@ -113,78 +89,93 @@ jobs:
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
if: steps.check.outputs.rebuild
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
DOCKER_SKIP_S3_UPLOAD: 1
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
cd .circleci/docker && ./build_docker.sh
{% block build +%}
build:
runs-on: linux.2xlarge
needs: [calculate-docker-image, !{{ ciflow_config.root_job_name }}]
needs: calculate-docker-image
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: !{{ build_environment }}-build
steps:
!{{ common.setup_ec2_linux() }}
!{{ common.checkout_pytorch("recursive") }}
- name: Log in to ECR
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Checkout PyTorch
uses: actions/checkout@v2
with:
fetch-depth: 0 # deep clone, to allow sharding to use git rev-list
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Build
- name: Preserve github env variables for use in docker
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Build PyTorch
run: |
docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e PR_LABELS \
-e http_proxy="!{{ common.squid_proxy }}" -e https_proxy="!{{ common.squid_proxy }}" -e no_proxy="!{{ common.squid_no_proxy }}" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
!{{ common.parse_ref() }}
"${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
# tools/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
IS_GHA: 1
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
CIRCLE_WORKFLOW_ID: ${{ github.run_id }} # dunno if this corresponds
run: |
export PYTHONPATH=$PWD
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26 boto3==1.16.34
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
pip3 install requests
python3 .circleci/scripts/upload_binary_size_to_scuba.py || exit 0
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
{%- if not is_libtorch %}
- name: Archive artifacts into zip
run: |
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json
- uses: !{{ common.upload_artifact_s3_action }}
zip -r artifacts.zip dist/ build/
# Upload to github so that people can click and download artifacts
- uses: actions/upload-artifact@v2
# Don't fail on upload to GH since it's only for user convenience
continue-on-error: true
name: Store PyTorch Build Artifacts on Github
with:
name: ${{ env.BUILD_ENVIRONMENT }}
retention-days: 14
if-no-files-found: error
path:
artifacts.zip
- uses: seemethere/upload-artifact-s3@9d7ceb0ab39c2c88d93ef7792b27425b27d59162
name: Store PyTorch Build Artifacts on S3
with:
name: ${{ env.BUILD_ENVIRONMENT }}
@ -192,78 +183,38 @@ jobs:
if-no-files-found: error
path:
artifacts.zip
{%- endif %}
!{{ common.teardown_ec2_linux() }}
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af
{%- endblock %}
{%- if not exclude_test %}
{% block test +%}
generate-test-matrix:
runs-on: ubuntu-18.04
{%- if ciflow_config.enabled %}
needs: [!{{ ciflow_config.root_job_name }}]
{%- endif %}
env:
TEST_RUNNER_TYPE: !{{ test_runner_type }}
ENABLE_DISTRIBUTED_TEST: !{{ enable_distributed_test }}
ENABLE_JIT_LEGACY_TEST: !{{ enable_jit_legacy_test }}
ENABLE_MULTIGPU_TEST: !{{ enable_multigpu_test }}
ENABLE_NOGPU_NO_AVX_TEST: !{{ enable_nogpu_no_avx_test }}
ENABLE_NOGPU_NO_AVX2_TEST: !{{ enable_nogpu_no_avx2_test }}
ENABLE_SLOW_TEST: !{{ enable_slow_test }}
ENABLE_DOCS_TEST: !{{ enable_docs_test }}
ENABLE_BACKWARDS_COMPAT_TEST: !{{ enable_backwards_compat_test }}
ENABLE_XLA_TEST: !{{ enable_xla_test }}
ENABLE_NOARCH_TEST: !{{ enable_noarch_test }}
NUM_TEST_SHARDS: !{{ num_test_shards }}
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
NOGPU_RUNNER_TYPE: linux.2xlarge
PR_BODY: ${{ github.event.pull_request.body }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions==3.10
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
- name: Generating test matrix
id: set-matrix
run: .github/scripts/generate_pytorch_test_matrix.py
test:
needs: [calculate-docker-image, build, generate-test-matrix, !{{ ciflow_config.root_job_name }}]
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
runs-on: !{{ test_runner_type }}
needs:
- calculate-docker-image
- build
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: !{{ build_environment }}-test
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
steps:
!{{ common.setup_ec2_linux() }}
!{{ common.checkout_pytorch("recursive") }}
- name: Log in to ECR
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)/../":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Checkout PyTorch
uses: actions/checkout@v2
with:
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') }}
run: |
bash .github/scripts/install_nvidia_utils_linux.sh
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
@ -289,98 +240,134 @@ jobs:
- name: Output disk space left
run: |
sudo df -H
!{{ common.parse_ref() }}
- name: Test
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
IS_GHA: 1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
AWS_DEFAULT_REGION: us-east-1
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Test PyTorch
run: |
if [[ $TEST_CONFIG == 'multigpu' ]]; then
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
# detached container should get cleaned up by teardown_ec2_linux
# TODO: Stop building test binaries as part of the build phase
# Used for GPU_FLAG since that doesn't play nice
# shellcheck disable=SC2086
container_name=$(docker run \
docker run \
${GPU_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e PR_NUMBER \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e GITHUB_ACTIONS \
-e IN_CI \
-e IS_GHA \
-e CIRCLE_BRANCH \
-e CIRCLE_SHA1 \
-e CIRCLE_PR_NUMBER \
-e AWS_DEFAULT_REGION \
-e IN_WHEEL_TEST \
-e SHARD_NUMBER \
-e JOB_BASE_NAME \
-e TEST_CONFIG \
-e NUM_TEST_SHARDS \
-e PYTORCH_IGNORE_DISABLED_ISSUES \
-e PR_LABELS \
-e CONTINUE_THROUGH_ERROR \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e http_proxy="!{{ common.squid_proxy }}" -e https_proxy="!{{ common.squid_proxy }}" -e no_proxy="!{{ common.squid_no_proxy }}" \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="${SHM_SIZE}" \
--tty \
--detach \
--name="${container_name}" \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
"${DOCKER_IMAGE}" \
sh -c 'sudo chown -R jenkins . && pip install dist/*.whl && .jenkins/pytorch/test.sh'
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
!{{ common.render_test_results() }}
{%- if is_coverage %}
- name: Report coverage
- uses: actions/upload-artifact@v2
name: Store PyTorch Test Reports
if: always()
with:
name: test-reports
retention-days: 14
if-no-files-found: error
path:
test/**/*.xml
- name: Clean up docker images
if: always()
run: |
python3 -mpip install codecov==2.1.12
python3 -mcodecov
{%- endif %}
!{{ common.upload_test_reports(name='linux') }}
!{{ common.upload_test_statistics(build_environment) }}
!{{ common.teardown_ec2_linux() }}
{% endblock %}
{%- endif -%}
{%- if enable_doc_jobs %}
build-docs:
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
# Prune all of the docker images
docker system prune -af
# this is a separate step from test because the log files from test are too
# long: basically, GitHub tries to render all of the log files when you click
# through an action causing extreme slowdown on actions that contain too many
# logs (like test); we can always move it back to the other one, but it
# doesn't create the best experience
render_test_results:
if: always()
needs:
- test
runs-on: ubuntu-18.04
steps:
- name: Checkout PyTorch
uses: actions/checkout@v2
with:
# deep clone, to allow tools/print_test_stats.py to use Git commands
fetch-depth: 0
- uses: actions/download-artifact@v2
name: Download PyTorch Test Reports
with:
name: test-reports
path: test/test-reports
- uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install dependencies
# boto3 version copied from .circleci/docker/common/install_conda.sh
run: |
pip install -r requirements.txt
pip install boto3==1.16.34 junitparser rich
- name: Output Test Results (Click Me)
run: |
python tools/render_junit.py test
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload test statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/print_test_stats.py to natively support GitHub Actions
env:
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_OSSCI_METRICS_SECRET_ACCESS_KEY }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_JOB: !{{ build_environment }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: ${{ github.run_id }} # dunno if this corresponds
run: |
export PYTHONPATH=$PWD
python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test
{%- if enable_doc_jobs %}
pytorch_python_doc_build:
runs-on: linux.2xlarge
strategy:
matrix:
docs_type: [cpp, python]
needs: [calculate-docker-image, build, !{{ ciflow_config.root_job_name }}]
needs:
- calculate-docker-image
- build
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
DOCS_TYPE: ${{ matrix.docs_type }}
steps:
!{{ common.setup_ec2_linux() }}
!{{ common.checkout_pytorch("recursive") }}
- name: Log in to ECR
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Checkout PyTorch
uses: actions/checkout@v2
with:
fetch-depth: 0 # deep clone, to allow sharding to use git rev-list
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
name: Download PyTorch Build Artifacts
with:
@ -388,64 +375,45 @@ jobs:
- name: Unzip artifacts
run: |
unzip -o artifacts.zip
- name: Build ${{ matrix.docs_type }} docs
- name: Build Python Doc in Docker
run: |
set -ex
time docker pull "${DOCKER_IMAGE}" > /dev/null
echo "${GITHUB_REF}"
ref=${GITHUB_REF##*/}
target=${ref//v}
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
docker run \
-e BUILD_ENVIRONMENT \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e IN_CI \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e CIRCLE_SHA1="$GITHUB_SHA" \
-e DOCS_VERSION="${target}" \
-e DOCS_TYPE \
-e PR_LABELS \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--name="$GITHUB_SHA" \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" bash -c "sudo chown -R jenkins . && pip install dist/*.whl && ./.circleci/scripts/${DOCS_TYPE}_doc_push_script.sh"
"${DOCKER_IMAGE}" \
bash -c "sudo chown -R jenkins . && pip install dist/*.whl && ./.circleci/scripts/python_doc_push_script.sh docs/$target $target site"
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- uses: !{{ common.upload_artifact_s3_action }}
name: Upload Python Docs Preview
if: ${{ github.event_name == 'pull_request' && matrix.docs_type == 'python' }}
with:
retention-days: 14
s3-bucket: doc-previews
if-no-files-found: error
path: pytorch.github.io/docs/merge/
s3-prefix: pytorch/${{ github.event.pull_request.number }}
- uses: !{{ common.upload_artifact_s3_action }}
name: Upload C++ Docs Preview
if: ${{ github.event_name == 'pull_request' && matrix.docs_type == 'cpp' }}
with:
retention-days: 14
if-no-files-found: error
s3-bucket: doc-previews
path: cppdocs/
s3-prefix: pytorch/${{ github.event.pull_request.number }}/cppdocs
- name: Archive artifacts into zip
run: |
zip -r "docs_${DOCS_TYPE}.zip" "${GITHUB_WORKSPACE}/pytorch.github.io" "${GITHUB_WORKSPACE}/cppdocs"
zip -r pytorch_github_io.zip "${GITHUB_WORKSPACE}/pytorch.github.io"
- uses: actions/upload-artifact@v2
name: Store PyTorch Build Artifacts
with:
name: docs_${{ matrix.docs_type }}
path: docs_${{ matrix.docs_type }}.zip
name: pytorch_github_io
if-no-files-found: error
!{{ common.teardown_ec2_linux() }}
{%- endif -%}
path: pytorch_github_io.zip
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af
{%- endif -%}

View File

@ -1,43 +1,15 @@
{% import 'common.yml.j2' as common %}
{%- macro wait_and_kill_ssh() -%}
- name: Wait until all sessions have drained
shell: powershell
if: always()
timeout-minutes: 120
run: |
.github\scripts\wait_for_ssh_to_drain.ps1
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
shell: powershell
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
{%- endmacro -%}
# Template is at: .github/templates/windows_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: !{{ build_environment }}
name: Windows CI (!{{ build_environment }})
on:
{%- if on_pull_request %}
pull_request:
{%- if ciflow_config.enabled %}
{%- if ciflow_config.trigger_action_only %}
types: [!{{ ciflow_config.trigger_action }}]
{%- else %}
types: [opened, synchronize, reopened, !{{ ciflow_config.trigger_action }}]
{%- endif %}
{%- endif %}
{%- endif %}
{%- if is_scheduled %}
schedule:
- cron: !{{ is_scheduled }}
{%- else %}
push:
branches:
- master
- release/*
{%- endif %}
workflow_dispatch:
env:
@ -46,56 +18,33 @@ env:
CUDA_VERSION: "!{{ cuda_version }}"
IN_CI: 1
INSTALL_WINDOWS_SDK: 1
JOB_BASE_NAME: test
PYTHON_VERSION: "3.8"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
SCCACHE_BUCKET: "ossci-compiler-cache"
VC_PRODUCT: "BuildTools"
VC_VERSION: ""
VS_VERSION: "16.8.6"
VC_YEAR: "2019"
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
no_proxy: !{{ common.squid_no_proxy }}
{%- if cuda_version != "cpu" %}
TORCH_CUDA_ARCH_LIST: "7.0"
USE_CUDA: 1
{%- endif %}
!{{ common.concurrency(build_environment) }}
concurrency:
group: !{{ build_environment }}-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true
jobs:
{%- if ciflow_config.enabled %}
!{{ ciflow_config.root_job_name }}:
runs-on: ubuntu-18.04
if: ${{ !{{ ciflow_config.root_job_condition }} }}
steps:
- name: noop
run: echo running !{{ ciflow_config.root_job_name }}
{%- endif %}
build:
runs-on: "windows.4xlarge"
defaults:
run:
working-directory: pytorch-${{ github.run_id }}
{%- if ciflow_config.enabled %}
needs: [!{{ ciflow_config.root_job_name }}]
{%- endif %}
env:
JOB_BASE_NAME: !{{ build_environment }}-build
http_proxy: "!{{ common. squid_proxy }}"
https_proxy: "!{{ common.squid_proxy }}"
steps:
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
uses: actions/checkout@v2
with:
submodules: recursive
path: pytorch-${{ github.run_id }}
# deep clone, to allow use of git merge-base
fetch-depth: 0
!{{ common.display_ec2_information() }}
- name: Clean workspace (including things in .gitignore)
shell: bash
run: |
git clean -xdf
- name: Install Visual Studio 2019 toolchain
shell: powershell
run: |
@ -112,8 +61,6 @@ jobs:
{%- endif %}
- name: Build
shell: bash
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: |
.jenkins/pytorch/win-build.sh
# Upload to github so that people can click and download artifacts
@ -126,86 +73,31 @@ jobs:
retention-days: 14
if-no-files-found: error
name: ${{ env.BUILD_ENVIRONMENT }}
path: C:\${{ github.run_id }}\build-results
path: C:\w\build-results
- name: Upload artifacts to s3
if: always()
uses: !{{ common.upload_artifact_s3_action }}
uses: seemethere/upload-artifact-s3@9d7ceb0ab39c2c88d93ef7792b27425b27d59162
with:
retention-days: 14
if-no-files-found: error
name: ${{ env.BUILD_ENVIRONMENT }}
path: C:\${{ github.run_id }}\build-results
!{{ wait_and_kill_ssh() }}
- name: Cleanup build-results and workspaces
if: always()
shell: bash
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
# Should remove the entirety of pytorch-${{ github.run_id }}
run: |
rm -rf "${PYTORCH_FINAL_PACKAGE_DIR}"
rm -rf ./*
generate-test-matrix:
{%- if ciflow_config.enabled %}
needs: [!{{ ciflow_config.root_job_name }}]
{%- endif %}
runs-on: ubuntu-18.04
env:
TEST_RUNNER_TYPE: !{{ test_runner_type }}
NUM_TEST_SHARDS: !{{ num_test_shards }}
NUM_TEST_SHARDS_ON_PULL_REQUEST: !{{ num_test_shards_on_pull_request }}
PR_BODY: ${{ github.event.pull_request.body }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions==3.10
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
- name: Generating test matrix
id: set-matrix
run: .github/scripts/generate_pytorch_test_matrix.py
path: C:\w\build-results
test:
{%- if only_build_on_pull_request %}
if: ${{ github.event_name == 'push' }}
{%- endif %}
runs-on: !{{ test_runner_type }}
env:
JOB_BASE_NAME: !{{ build_environment }}-test
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
TEST_CONFIG: ${{ matrix.config }}
http_proxy: "!{{ common.squid_proxy }}"
https_proxy: "!{{ common.squid_proxy }}"
RUN_SMOKE_TESTS_ONLY_ON_PR: !{{ only_run_smoke_tests_on_pull_request }}
PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
needs: [build, generate-test-matrix, !{{ ciflow_config.root_job_name }}]
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
defaults:
run:
working-directory: pytorch-${{ github.run_id }}
needs:
- build
steps:
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
uses: actions/checkout@v2
with:
submodules: recursive
path: pytorch-${{ github.run_id }}
# deep clone, to allow use of git merge-base
fetch-depth: 0
!{{ common.display_ec2_information() }}
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Clean workspace (including things in .gitignore)
shell: bash
run: |
git clean -xdf
- name: Install Visual Studio 2019 toolchain
shell: powershell
run: |
@ -234,26 +126,71 @@ jobs:
name: Setup Python3
with:
python-version: '3.x'
- name: Test
- name: Run test scripts
shell: bash
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: |
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
if [[ -n $GITHUB_HEAD_REF && "$RUN_SMOKE_TESTS_ONLY_ON_PR" == "true" ]]; then
export RUN_SMOKE_TESTS_ONLY=1
fi
.jenkins/pytorch/win-test.sh
!{{ common.upload_test_reports(name='windows') }}
!{{ common.render_test_results() }}
!{{ wait_and_kill_ssh() }}
!{{ common.parse_ref() }}
!{{ common.upload_test_statistics(build_environment) }}
- name: Cleanup workspace
- uses: actions/upload-artifact@v2
name: Store PyTorch Test Reports
if: always()
shell: bash
# Should remove the entirety of pytorch-${{ github.run_id }}
with:
name: test-reports
retention-days: 14
if-no-files-found: error
path:
test/**/*.xml
# this is a separate step from test because the log files from test are too
# long: basically, GitHub tries to render all of the log files when you click
# through an action causing extreme slowdown on actions that contain too many
# logs (like test); we can always move it back to the other one, but it
# doesn't create the best experience
render_test_results:
if: always()
needs:
- test
runs-on: ubuntu-18.04
# TODO: Make this into a composite step
steps:
- name: Checkout PyTorch
uses: actions/checkout@v2
with:
# deep clone, to allow tools/print_test_stats.py to use Git commands
fetch-depth: 0
- uses: actions/download-artifact@v2
name: Download PyTorch Test Reports
with:
name: test-reports
path: test/test-reports
- uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install dependencies
# boto3 version copied from .circleci/docker/common/install_conda.sh
run: |
rm -rf ./*
pip install -r requirements.txt
pip install boto3==1.16.34 junitparser rich
- name: Output Test Results (Click Me)
run: |
python tools/render_junit.py test
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload test statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/print_test_stats.py to natively support GitHub Actions
env:
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_OSSCI_METRICS_SECRET_ACCESS_KEY }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_JOB: !{{ build_environment }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: ${{ github.run_id }} # dunno if this corresponds
run: |
export PYTHONPATH=$PWD
python tools/print_test_stats.py --upload-to-s3 --compare-with-s3 test

66
.github/workflows/add_annotations.yml vendored Normal file
View File

@ -0,0 +1,66 @@
name: Add annotations
on:
workflow_run:
types:
- completed
workflows:
- Lint
jobs:
annotate:
strategy:
fail-fast: false
matrix:
name:
- flake8-py3
- clang-tidy
runs-on: ubuntu-18.04
steps:
- name: Download artifact
uses: actions/github-script@v3
env:
RUN_ID: ${{ github.event.workflow_run.id }}
LINT_NAME: ${{ matrix.name }}
with:
# https://securitylab.github.com/research/github-actions-preventing-pwn-requests/
script: |
const artifacts = await github.actions.listWorkflowRunArtifacts({
owner: context.repo.owner,
repo: context.repo.repo,
run_id: process.env.RUN_ID,
});
const filteredArtifacts = artifacts.data.artifacts.filter(artifact => {
return artifact.name == process.env.LINT_NAME;
});
if (filteredArtifacts.length > 0) {
const matchArtifact = filteredArtifacts[0];
const download = await github.actions.downloadArtifact({
owner: context.repo.owner,
repo: context.repo.repo,
artifact_id: matchArtifact.id,
archive_format: 'zip',
});
const fs = require('fs');
fs.writeFileSync(
`${process.env.GITHUB_WORKSPACE}/linter-output.zip`,
Buffer.from(download.data),
);
}
- name: Unzip artifact
id: unzip
run: |
if unzip linter-output.zip annotations.json commit-sha.txt; then
echo ::set-output \
name=sha::"$(grep -Em1 '^[[:xdigit:]]{40}$' commit-sha.txt)"
fi
- if: steps.unzip.outputs.sha
name: Add annotations
uses: pytorch/add-annotations-github-action@master
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
check_name: ${{ matrix.name }}
linter_output_path: annotations.json
commit_sha: ${{ steps.unzip.outputs.sha }}
mode: json

View File

@ -6,15 +6,8 @@ on:
pull_request_target:
types: [edited, opened, synchronize, reopened]
concurrency:
group: auto-label-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
auto-label-rocm:
if: ${{ github.repository == 'pytorch/pytorch' }}
runs-on: ubuntu-18.04
steps:
- name: Retrieve information

View File

@ -16,7 +16,7 @@ jobs:
image: python:3.9
steps:
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
uses: actions/checkout@v2
- name: Generating build matrix
id: set-matrix
run: |
@ -57,12 +57,12 @@ jobs:
- name: Clean runner workspace
run: rm -rf "$GITHUB_WORKSPACE"
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
uses: actions/checkout@v2
with:
path: pytorch
submodules: recursive
- name: Clone pytorch/builder
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
uses: actions/checkout@v2
with:
repository: pytorch/builder
path: builder
@ -91,25 +91,23 @@ jobs:
with:
name: pytorch-conda-py${{ matrix.python_version }}-${{matrix.gpu_arch_type}}-${{ matrix.gpu_arch_version }}
path: /remote/**/*.bz2
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
# tools/print_test_stats.py to natively support GitHub Actions
env:
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
CIRCLE_WORKFLOW_ID: ${{ github.run_id }} # dunno if this corresponds
run: |
export PYTHONPATH=$PWD
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
pip3 install requests
python3 .circleci/scripts/upload_binary_size_to_scuba.py || exit 0
concurrency:
group: build-linux-conda-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
group: build-linux-conda-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true

View File

@ -16,7 +16,7 @@ jobs:
image: python:3.9
steps:
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
uses: actions/checkout@v2
- name: Generating build matrix
id: set-matrix
run: |
@ -51,12 +51,12 @@ jobs:
- name: Clean runner workspace
run: rm -rf "$GITHUB_WORKSPACE"
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
uses: actions/checkout@v2
with:
path: pytorch
submodules: recursive
- name: Clone pytorch/builder
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
uses: actions/checkout@v2
with:
repository: pytorch/builder
path: builder
@ -90,25 +90,23 @@ jobs:
with:
name: pytorch-libtorch-${{ matrix.libtorch_variant }}-${{ matrix.devtoolset }}-${{matrix.gpu_arch_type}}-${{ matrix.gpu_arch_version }}
path: /remote/**/*.zip
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
# tools/print_test_stats.py to natively support GitHub Actions
env:
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
CIRCLE_WORKFLOW_ID: ${{ github.run_id }} # dunno if this corresponds
run: |
export PYTHONPATH=$PWD
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
pip3 install requests
python3 .circleci/scripts/upload_binary_size_to_scuba.py || exit 0
concurrency:
group: build-linux-libtorch-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
group: build-linux-libtorch-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true

View File

@ -16,7 +16,7 @@ jobs:
image: python:3.9
steps:
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
uses: actions/checkout@v2
- name: Generating build matrix
id: set-matrix
run: |
@ -46,12 +46,12 @@ jobs:
- name: Clean runner workspace
run: rm -rf "$GITHUB_WORKSPACE"
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
uses: actions/checkout@v2
with:
path: pytorch
submodules: recursive
- name: Clone pytorch/builder
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
uses: actions/checkout@v2
with:
repository: pytorch/builder
path: builder
@ -89,25 +89,23 @@ jobs:
with:
name: pytorch-wheel-py${{ matrix.python_version }}-${{matrix.gpu_arch_type}}-${{ matrix.gpu_arch_version }}
path: /remote/**/*.whl
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
# tools/print_test_stats.py to natively support GitHub Actions
env:
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
CIRCLE_WORKFLOW_ID: ${{ github.run_id }} # dunno if this corresponds
run: |
export PYTHONPATH=$PWD
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
pip3 install requests
python3 .circleci/scripts/upload_binary_size_to_scuba.py || exit 0
concurrency:
group: build-linux-wheels-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
group: build-linux-wheels-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true

48
.github/workflows/clang_format.yml vendored Normal file
View File

@ -0,0 +1,48 @@
name: clang-format
on:
pull_request:
jobs:
clang-format:
runs-on: ubuntu-18.04
steps:
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.x
architecture: x64
- name: Fetch PyTorch
uses: actions/checkout@v2
with:
fetch-depth: 0 # deep clone, to allow us to use git merge-base
- name: Run clang-format
env:
BASE_SHA: ${{ github.event.pull_request.base.sha }}
run: |
set -eu
# This is necessary to get the same results regardless of whether the
# PR was opened directly or from a forked repo. See: `9f890a92` for more info.
git remote add upstream https://github.com/pytorch/pytorch
git fetch upstream "$GITHUB_BASE_REF"
# only run clang-format on allowlisted files
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
echo "| clang-format failures found! Run: "
echo "| tools/clang_format_ci.sh ${BASE_SHA} "
echo "| to fix this error. "
echo "| For more info, see: https://github.com/pytorch/pytorch/wiki/clang-format "
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
tools/clang_format_ci.sh "${BASE_SHA}"
GIT_DIFF=$(git diff)
if [[ -z $GIT_DIFF ]]; then
exit 0
fi
echo "$GIT_DIFF"
exit 1
concurrency:
group: clang-format-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true

View File

@ -1,53 +0,0 @@
name: Create Release
on:
push:
tags: ['v*']
branches: [master]
release:
types: [published]
pull_request:
paths: [.github/workflows/create_release.yml]
jobs:
release:
if: ${{ github.repository == 'pytorch/pytorch' }}
name: Create Release
runs-on: ubuntu-latest
steps:
- uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
submodules: 'recursive'
- name: Fake name for PRs
if: ${{ github.event_name == 'pull_request' }}
run: echo "PT_GITHUB_REF=refs/tags/pr-tag" >> "$GITHUB_ENV"
- name: Real name for non-PRs
if: ${{ github.event_name != 'pull_request' }}
run: echo "PT_GITHUB_REF=$GITHUB_REF" >> "$GITHUB_ENV"
- name: Set filenames
run: |
tag_or_branch="${PT_GITHUB_REF#refs/tags/}"
tag_or_branch="${tag_or_branch#refs/heads/}"
echo "PT_RELEASE_NAME=pytorch-$tag_or_branch" >> "$GITHUB_ENV"
echo "PT_RELEASE_FILE=pytorch-$tag_or_branch.tar.gz" >> "$GITHUB_ENV"
- name: Create source distribution
run: |
# Create new folder with specified name so extracting the archive yields that
rm -rf "/tmp/$PT_RELEASE_NAME"
cp -r "$PWD" "/tmp/$PT_RELEASE_NAME"
mv "/tmp/$PT_RELEASE_NAME" .
# Cleanup
rm -r "$PT_RELEASE_NAME"/{.azure_pipelines,.circleci,.jenkins}
find "$PT_RELEASE_NAME" -name '.git*' -exec rm -rv {} \; || true
# Create archive
tar -czf "$PT_RELEASE_FILE" "$PT_RELEASE_NAME"
echo "Created source archive $PT_RELEASE_FILE with content: $(ls -a "$PT_RELEASE_NAME")"
- name: Upload source distribution
if: ${{ github.event_name == 'release' }}
uses: softprops/action-gh-release@v1
with:
files: ${{env.PT_RELEASE_FILE}}
concurrency:
group: create-release-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true

View File

@ -1,283 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: libtorch-linux-xenial-cuda10.2-py3.6-gcc7
on:
pull_request:
types: [unassigned]
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: libtorch-linux-xenial-cuda10.2-py3.6-gcc7
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
concurrency:
group: libtorch-linux-xenial-cuda10.2-py3.6-gcc7-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
if: ${{ (github.repository == 'pytorch/pytorch') && ((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cuda') || contains(github.event.pull_request.labels.*.name, 'ciflow/libtorch') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux'))) }}
env:
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
steps:
- name: noop
run: echo running ciflow_should_run
- name: print labels
run: echo "${LABELS}"
calculate-docker-image:
runs-on: linux.2xlarge
needs: [ciflow_should_run]
env:
DOCKER_BUILDKIT: 1
timeout-minutes: 90
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: false
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
# Check if image already exists, if it does then skip building it
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
exit 0
fi
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
# if we're on the base branch then use the parent commit
MERGE_BASE=$(git rev-parse HEAD~)
else
# otherwise we're on a PR, so use the most recent base commit
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
fi
# Covers the case where a previous tag doesn't exist for the tree
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
exit 1
fi
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
# If no image exists but the hash is the same as the previous hash then we should error out here
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
echo " contact the PyTorch team to restore the original images"
exit 1
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
DOCKER_SKIP_S3_UPLOAD: 1
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
cd .circleci/docker && ./build_docker.sh
build:
runs-on: linux.2xlarge
needs: [calculate-docker-image, ciflow_should_run]
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: libtorch-linux-xenial-cuda10.2-py3.6-gcc7-build
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Build
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e PR_LABELS \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
IS_GHA: 1
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26 boto3==1.16.34
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af

View File

@ -1,283 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: libtorch-linux-xenial-cuda11.3-py3.6-gcc7
on:
pull_request:
types: [unassigned]
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: libtorch-linux-xenial-cuda11.3-py3.6-gcc7
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
concurrency:
group: libtorch-linux-xenial-cuda11.3-py3.6-gcc7-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
if: ${{ (github.repository == 'pytorch/pytorch') && ((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cuda') || contains(github.event.pull_request.labels.*.name, 'ciflow/libtorch') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux'))) }}
env:
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
steps:
- name: noop
run: echo running ciflow_should_run
- name: print labels
run: echo "${LABELS}"
calculate-docker-image:
runs-on: linux.2xlarge
needs: [ciflow_should_run]
env:
DOCKER_BUILDKIT: 1
timeout-minutes: 90
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: false
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
# Check if image already exists, if it does then skip building it
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
exit 0
fi
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
# if we're on the base branch then use the parent commit
MERGE_BASE=$(git rev-parse HEAD~)
else
# otherwise we're on a PR, so use the most recent base commit
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
fi
# Covers the case where a previous tag doesn't exist for the tree
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
exit 1
fi
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
# If no image exists but the hash is the same as the previous hash then we should error out here
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
echo " contact the PyTorch team to restore the original images"
exit 1
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
DOCKER_SKIP_S3_UPLOAD: 1
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
cd .circleci/docker && ./build_docker.sh
build:
runs-on: linux.2xlarge
needs: [calculate-docker-image, ciflow_should_run]
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: libtorch-linux-xenial-cuda11.3-py3.6-gcc7-build
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Build
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e PR_LABELS \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
IS_GHA: 1
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26 boto3==1.16.34
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af

View File

@ -1,562 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: linux-bionic-cuda10.2-py3.9-gcc7
on:
pull_request:
types: [unassigned]
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: linux-bionic-cuda10.2-py3.9-gcc7
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
concurrency:
group: linux-bionic-cuda10.2-py3.9-gcc7-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
if: ${{ (github.repository_owner == 'pytorch') && ((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cuda') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/slow'))) }}
env:
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
steps:
- name: noop
run: echo running ciflow_should_run
- name: print labels
run: echo "${LABELS}"
calculate-docker-image:
runs-on: linux.2xlarge
needs: [ciflow_should_run]
env:
DOCKER_BUILDKIT: 1
timeout-minutes: 90
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: false
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
# Check if image already exists, if it does then skip building it
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
exit 0
fi
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
# if we're on the base branch then use the parent commit
MERGE_BASE=$(git rev-parse HEAD~)
else
# otherwise we're on a PR, so use the most recent base commit
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
fi
# Covers the case where a previous tag doesn't exist for the tree
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
exit 1
fi
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
# If no image exists but the hash is the same as the previous hash then we should error out here
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
echo " contact the PyTorch team to restore the original images"
exit 1
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
DOCKER_SKIP_S3_UPLOAD: 1
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
cd .circleci/docker && ./build_docker.sh
build:
runs-on: linux.2xlarge
needs: [calculate-docker-image, ciflow_should_run]
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: linux-bionic-cuda10.2-py3.9-gcc7-build
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Build
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e PR_LABELS \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
IS_GHA: 1
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26 boto3==1.16.34
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Archive artifacts into zip
run: |
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json
- uses: seemethere/upload-artifact-s3@v3
name: Store PyTorch Build Artifacts on S3
with:
name: ${{ env.BUILD_ENVIRONMENT }}
retention-days: 14
if-no-files-found: error
path:
artifacts.zip
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af
generate-test-matrix:
runs-on: ubuntu-18.04
needs: [ciflow_should_run]
env:
TEST_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
ENABLE_DISTRIBUTED_TEST: 1
ENABLE_JIT_LEGACY_TEST: ''
ENABLE_MULTIGPU_TEST: ''
ENABLE_NOGPU_NO_AVX_TEST: ''
ENABLE_NOGPU_NO_AVX2_TEST: ''
ENABLE_SLOW_TEST: ''
ENABLE_DOCS_TEST: ''
ENABLE_BACKWARDS_COMPAT_TEST: ''
ENABLE_XLA_TEST: ''
ENABLE_NOARCH_TEST: ''
NUM_TEST_SHARDS: 2
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
NOGPU_RUNNER_TYPE: linux.2xlarge
PR_BODY: ${{ github.event.pull_request.body }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions==3.10
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
- name: Generating test matrix
id: set-matrix
run: .github/scripts/generate_pytorch_test_matrix.py
test:
needs: [calculate-docker-image, build, generate-test-matrix, ciflow_should_run]
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: linux-bionic-cuda10.2-py3.9-gcc7-test
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
run: |
bash .github/scripts/install_nvidia_utils_linux.sh
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
- name: Determine shm-size
run: |
shm_size="1g"
case "${BUILD_ENVIRONMENT}" in
*cuda*)
shm_size="2g"
;;
*rocm*)
shm_size="8g"
;;
esac
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
name: Download PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
- name: Unzip artifacts
run: |
unzip -o artifacts.zip
- name: Output disk space left
run: |
sudo df -H
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Test
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
IS_GHA: 1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
AWS_DEFAULT_REGION: us-east-1
run: |
if [[ $TEST_CONFIG == 'multigpu' ]]; then
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
# detached container should get cleaned up by teardown_ec2_linux
# TODO: Stop building test binaries as part of the build phase
# Used for GPU_FLAG since that doesn't play nice
# shellcheck disable=SC2086
container_name=$(docker run \
${GPU_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e PR_NUMBER \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e GITHUB_ACTIONS \
-e IN_CI \
-e IS_GHA \
-e CIRCLE_BRANCH \
-e CIRCLE_SHA1 \
-e CIRCLE_PR_NUMBER \
-e AWS_DEFAULT_REGION \
-e IN_WHEEL_TEST \
-e SHARD_NUMBER \
-e JOB_BASE_NAME \
-e TEST_CONFIG \
-e NUM_TEST_SHARDS \
-e PYTORCH_IGNORE_DISABLED_ISSUES \
-e PR_LABELS \
-e CONTINUE_THROUGH_ERROR \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="${SHM_SIZE}" \
--tty \
--detach \
--name="${container_name}" \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Install render_test_results dependencies
if: always()
shell: bash
run: |
python3 -m pip install junitparser==2.1.1 rich==10.9.0
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
if: always()
shell: bash
# Encoding is weird on windows, just try to default to utf-8 if possible
env:
PYTHONIOENCODING: "utf-8"
run: |
python3 tools/render_junit.py test/
- name: Zip test reports for upload
if: always()
env:
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
run: |
# Remove any previous test reports if they exist
rm -f test-reports-*.zip
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
- uses: actions/upload-artifact@v2
name: Store Test Reports
if: always()
with:
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Reports on S3
if: always()
with:
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- name: Display and upload test statistics (Click Me)
if: always()
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
JOB_BASE_NAME: linux-bionic-cuda10.2-py3.9-gcc7-test
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
shell: bash
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install boto3==1.16.34
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af

View File

@ -1,562 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: linux-bionic-py3.6-clang9
on:
pull_request:
types: [opened, synchronize, reopened, unassigned]
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: linux-bionic-py3.6-clang9
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-py3.6-clang9
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
concurrency:
group: linux-bionic-py3.6-clang9-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
if: ${{ (github.repository == 'pytorch/pytorch') && ((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cpu') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/noarch') || contains(github.event.pull_request.labels.*.name, 'ciflow/xla'))) }}
env:
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
steps:
- name: noop
run: echo running ciflow_should_run
- name: print labels
run: echo "${LABELS}"
calculate-docker-image:
runs-on: linux.2xlarge
needs: [ciflow_should_run]
env:
DOCKER_BUILDKIT: 1
timeout-minutes: 90
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: false
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
# Check if image already exists, if it does then skip building it
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
exit 0
fi
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
# if we're on the base branch then use the parent commit
MERGE_BASE=$(git rev-parse HEAD~)
else
# otherwise we're on a PR, so use the most recent base commit
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
fi
# Covers the case where a previous tag doesn't exist for the tree
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
exit 1
fi
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
# If no image exists but the hash is the same as the previous hash then we should error out here
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
echo " contact the PyTorch team to restore the original images"
exit 1
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
DOCKER_SKIP_S3_UPLOAD: 1
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
cd .circleci/docker && ./build_docker.sh
build:
runs-on: linux.2xlarge
needs: [calculate-docker-image, ciflow_should_run]
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: linux-bionic-py3.6-clang9-build
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Build
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e PR_LABELS \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
IS_GHA: 1
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26 boto3==1.16.34
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Archive artifacts into zip
run: |
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json
- uses: seemethere/upload-artifact-s3@v3
name: Store PyTorch Build Artifacts on S3
with:
name: ${{ env.BUILD_ENVIRONMENT }}
retention-days: 14
if-no-files-found: error
path:
artifacts.zip
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af
generate-test-matrix:
runs-on: ubuntu-18.04
needs: [ciflow_should_run]
env:
TEST_RUNNER_TYPE: linux.2xlarge
ENABLE_DISTRIBUTED_TEST: ''
ENABLE_JIT_LEGACY_TEST: ''
ENABLE_MULTIGPU_TEST: ''
ENABLE_NOGPU_NO_AVX_TEST: ''
ENABLE_NOGPU_NO_AVX2_TEST: ''
ENABLE_SLOW_TEST: ''
ENABLE_DOCS_TEST: ''
ENABLE_BACKWARDS_COMPAT_TEST: ''
ENABLE_XLA_TEST: ''
ENABLE_NOARCH_TEST: 1
NUM_TEST_SHARDS: 2
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
NOGPU_RUNNER_TYPE: linux.2xlarge
PR_BODY: ${{ github.event.pull_request.body }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions==3.10
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
- name: Generating test matrix
id: set-matrix
run: .github/scripts/generate_pytorch_test_matrix.py
test:
needs: [calculate-docker-image, build, generate-test-matrix, ciflow_should_run]
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: linux-bionic-py3.6-clang9-test
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
run: |
bash .github/scripts/install_nvidia_utils_linux.sh
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
- name: Determine shm-size
run: |
shm_size="1g"
case "${BUILD_ENVIRONMENT}" in
*cuda*)
shm_size="2g"
;;
*rocm*)
shm_size="8g"
;;
esac
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
name: Download PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
- name: Unzip artifacts
run: |
unzip -o artifacts.zip
- name: Output disk space left
run: |
sudo df -H
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Test
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
IS_GHA: 1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
AWS_DEFAULT_REGION: us-east-1
run: |
if [[ $TEST_CONFIG == 'multigpu' ]]; then
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
# detached container should get cleaned up by teardown_ec2_linux
# TODO: Stop building test binaries as part of the build phase
# Used for GPU_FLAG since that doesn't play nice
# shellcheck disable=SC2086
container_name=$(docker run \
${GPU_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e PR_NUMBER \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e GITHUB_ACTIONS \
-e IN_CI \
-e IS_GHA \
-e CIRCLE_BRANCH \
-e CIRCLE_SHA1 \
-e CIRCLE_PR_NUMBER \
-e AWS_DEFAULT_REGION \
-e IN_WHEEL_TEST \
-e SHARD_NUMBER \
-e JOB_BASE_NAME \
-e TEST_CONFIG \
-e NUM_TEST_SHARDS \
-e PYTORCH_IGNORE_DISABLED_ISSUES \
-e PR_LABELS \
-e CONTINUE_THROUGH_ERROR \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="${SHM_SIZE}" \
--tty \
--detach \
--name="${container_name}" \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Install render_test_results dependencies
if: always()
shell: bash
run: |
python3 -m pip install junitparser==2.1.1 rich==10.9.0
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
if: always()
shell: bash
# Encoding is weird on windows, just try to default to utf-8 if possible
env:
PYTHONIOENCODING: "utf-8"
run: |
python3 tools/render_junit.py test/
- name: Zip test reports for upload
if: always()
env:
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
run: |
# Remove any previous test reports if they exist
rm -f test-reports-*.zip
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
- uses: actions/upload-artifact@v2
name: Store Test Reports
if: always()
with:
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Reports on S3
if: always()
with:
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- name: Display and upload test statistics (Click Me)
if: always()
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
JOB_BASE_NAME: linux-bionic-py3.6-clang9-test
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
shell: bash
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install boto3==1.16.34
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af

View File

@ -1,566 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: linux-bionic-py3.8-gcc9-coverage
on:
pull_request:
types: [opened, synchronize, reopened, unassigned]
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: linux-bionic-py3.8-gcc9-coverage
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-py3.8-gcc9
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
concurrency:
group: linux-bionic-py3.8-gcc9-coverage-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
if: ${{ (github.repository == 'pytorch/pytorch') && ((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/coverage') || contains(github.event.pull_request.labels.*.name, 'ciflow/cpu') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux'))) }}
env:
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
steps:
- name: noop
run: echo running ciflow_should_run
- name: print labels
run: echo "${LABELS}"
calculate-docker-image:
runs-on: linux.2xlarge
needs: [ciflow_should_run]
env:
DOCKER_BUILDKIT: 1
timeout-minutes: 90
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: false
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
# Check if image already exists, if it does then skip building it
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
exit 0
fi
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
# if we're on the base branch then use the parent commit
MERGE_BASE=$(git rev-parse HEAD~)
else
# otherwise we're on a PR, so use the most recent base commit
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
fi
# Covers the case where a previous tag doesn't exist for the tree
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
exit 1
fi
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
# If no image exists but the hash is the same as the previous hash then we should error out here
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
echo " contact the PyTorch team to restore the original images"
exit 1
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
DOCKER_SKIP_S3_UPLOAD: 1
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
cd .circleci/docker && ./build_docker.sh
build:
runs-on: linux.2xlarge
needs: [calculate-docker-image, ciflow_should_run]
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: linux-bionic-py3.8-gcc9-coverage-build
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Build
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e PR_LABELS \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
IS_GHA: 1
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26 boto3==1.16.34
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Archive artifacts into zip
run: |
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json
- uses: seemethere/upload-artifact-s3@v3
name: Store PyTorch Build Artifacts on S3
with:
name: ${{ env.BUILD_ENVIRONMENT }}
retention-days: 14
if-no-files-found: error
path:
artifacts.zip
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af
generate-test-matrix:
runs-on: ubuntu-18.04
needs: [ciflow_should_run]
env:
TEST_RUNNER_TYPE: linux.2xlarge
ENABLE_DISTRIBUTED_TEST: 1
ENABLE_JIT_LEGACY_TEST: ''
ENABLE_MULTIGPU_TEST: ''
ENABLE_NOGPU_NO_AVX_TEST: ''
ENABLE_NOGPU_NO_AVX2_TEST: ''
ENABLE_SLOW_TEST: ''
ENABLE_DOCS_TEST: ''
ENABLE_BACKWARDS_COMPAT_TEST: ''
ENABLE_XLA_TEST: ''
ENABLE_NOARCH_TEST: ''
NUM_TEST_SHARDS: 2
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
NOGPU_RUNNER_TYPE: linux.2xlarge
PR_BODY: ${{ github.event.pull_request.body }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions==3.10
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
- name: Generating test matrix
id: set-matrix
run: .github/scripts/generate_pytorch_test_matrix.py
test:
needs: [calculate-docker-image, build, generate-test-matrix, ciflow_should_run]
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: linux-bionic-py3.8-gcc9-coverage-test
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
run: |
bash .github/scripts/install_nvidia_utils_linux.sh
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
- name: Determine shm-size
run: |
shm_size="1g"
case "${BUILD_ENVIRONMENT}" in
*cuda*)
shm_size="2g"
;;
*rocm*)
shm_size="8g"
;;
esac
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
name: Download PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
- name: Unzip artifacts
run: |
unzip -o artifacts.zip
- name: Output disk space left
run: |
sudo df -H
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Test
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
IS_GHA: 1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
AWS_DEFAULT_REGION: us-east-1
run: |
if [[ $TEST_CONFIG == 'multigpu' ]]; then
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
# detached container should get cleaned up by teardown_ec2_linux
# TODO: Stop building test binaries as part of the build phase
# Used for GPU_FLAG since that doesn't play nice
# shellcheck disable=SC2086
container_name=$(docker run \
${GPU_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e PR_NUMBER \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e GITHUB_ACTIONS \
-e IN_CI \
-e IS_GHA \
-e CIRCLE_BRANCH \
-e CIRCLE_SHA1 \
-e CIRCLE_PR_NUMBER \
-e AWS_DEFAULT_REGION \
-e IN_WHEEL_TEST \
-e SHARD_NUMBER \
-e JOB_BASE_NAME \
-e TEST_CONFIG \
-e NUM_TEST_SHARDS \
-e PYTORCH_IGNORE_DISABLED_ISSUES \
-e PR_LABELS \
-e CONTINUE_THROUGH_ERROR \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="${SHM_SIZE}" \
--tty \
--detach \
--name="${container_name}" \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Install render_test_results dependencies
if: always()
shell: bash
run: |
python3 -m pip install junitparser==2.1.1 rich==10.9.0
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
if: always()
shell: bash
# Encoding is weird on windows, just try to default to utf-8 if possible
env:
PYTHONIOENCODING: "utf-8"
run: |
python3 tools/render_junit.py test/
- name: Report coverage
run: |
python3 -mpip install codecov==2.1.12
python3 -mcodecov
- name: Zip test reports for upload
if: always()
env:
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
run: |
# Remove any previous test reports if they exist
rm -f test-reports-*.zip
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
- uses: actions/upload-artifact@v2
name: Store Test Reports
if: always()
with:
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Reports on S3
if: always()
with:
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- name: Display and upload test statistics (Click Me)
if: always()
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
JOB_BASE_NAME: linux-bionic-py3.8-gcc9-coverage-test
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
shell: bash
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install boto3==1.16.34
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af

View File

@ -1,562 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: linux-xenial-cuda10.2-py3.6-gcc7
on:
pull_request:
types: [unassigned]
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: linux-xenial-cuda10.2-py3.6-gcc7
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
concurrency:
group: linux-xenial-cuda10.2-py3.6-gcc7-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
if: ${{ (github.repository == 'pytorch/pytorch') && ((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cuda') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/slow'))) }}
env:
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
steps:
- name: noop
run: echo running ciflow_should_run
- name: print labels
run: echo "${LABELS}"
calculate-docker-image:
runs-on: linux.2xlarge
needs: [ciflow_should_run]
env:
DOCKER_BUILDKIT: 1
timeout-minutes: 90
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: false
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
# Check if image already exists, if it does then skip building it
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
exit 0
fi
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
# if we're on the base branch then use the parent commit
MERGE_BASE=$(git rev-parse HEAD~)
else
# otherwise we're on a PR, so use the most recent base commit
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
fi
# Covers the case where a previous tag doesn't exist for the tree
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
exit 1
fi
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
# If no image exists but the hash is the same as the previous hash then we should error out here
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
echo " contact the PyTorch team to restore the original images"
exit 1
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
DOCKER_SKIP_S3_UPLOAD: 1
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
cd .circleci/docker && ./build_docker.sh
build:
runs-on: linux.2xlarge
needs: [calculate-docker-image, ciflow_should_run]
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: linux-xenial-cuda10.2-py3.6-gcc7-build
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Build
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e PR_LABELS \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
IS_GHA: 1
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26 boto3==1.16.34
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Archive artifacts into zip
run: |
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json
- uses: seemethere/upload-artifact-s3@v3
name: Store PyTorch Build Artifacts on S3
with:
name: ${{ env.BUILD_ENVIRONMENT }}
retention-days: 14
if-no-files-found: error
path:
artifacts.zip
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af
generate-test-matrix:
runs-on: ubuntu-18.04
needs: [ciflow_should_run]
env:
TEST_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
ENABLE_DISTRIBUTED_TEST: 1
ENABLE_JIT_LEGACY_TEST: 1
ENABLE_MULTIGPU_TEST: 1
ENABLE_NOGPU_NO_AVX_TEST: 1
ENABLE_NOGPU_NO_AVX2_TEST: 1
ENABLE_SLOW_TEST: 1
ENABLE_DOCS_TEST: ''
ENABLE_BACKWARDS_COMPAT_TEST: ''
ENABLE_XLA_TEST: ''
ENABLE_NOARCH_TEST: ''
NUM_TEST_SHARDS: 2
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
NOGPU_RUNNER_TYPE: linux.2xlarge
PR_BODY: ${{ github.event.pull_request.body }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions==3.10
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
- name: Generating test matrix
id: set-matrix
run: .github/scripts/generate_pytorch_test_matrix.py
test:
needs: [calculate-docker-image, build, generate-test-matrix, ciflow_should_run]
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: linux-xenial-cuda10.2-py3.6-gcc7-test
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
run: |
bash .github/scripts/install_nvidia_utils_linux.sh
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
- name: Determine shm-size
run: |
shm_size="1g"
case "${BUILD_ENVIRONMENT}" in
*cuda*)
shm_size="2g"
;;
*rocm*)
shm_size="8g"
;;
esac
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
name: Download PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
- name: Unzip artifacts
run: |
unzip -o artifacts.zip
- name: Output disk space left
run: |
sudo df -H
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Test
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
IS_GHA: 1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
AWS_DEFAULT_REGION: us-east-1
run: |
if [[ $TEST_CONFIG == 'multigpu' ]]; then
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
# detached container should get cleaned up by teardown_ec2_linux
# TODO: Stop building test binaries as part of the build phase
# Used for GPU_FLAG since that doesn't play nice
# shellcheck disable=SC2086
container_name=$(docker run \
${GPU_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e PR_NUMBER \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e GITHUB_ACTIONS \
-e IN_CI \
-e IS_GHA \
-e CIRCLE_BRANCH \
-e CIRCLE_SHA1 \
-e CIRCLE_PR_NUMBER \
-e AWS_DEFAULT_REGION \
-e IN_WHEEL_TEST \
-e SHARD_NUMBER \
-e JOB_BASE_NAME \
-e TEST_CONFIG \
-e NUM_TEST_SHARDS \
-e PYTORCH_IGNORE_DISABLED_ISSUES \
-e PR_LABELS \
-e CONTINUE_THROUGH_ERROR \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="${SHM_SIZE}" \
--tty \
--detach \
--name="${container_name}" \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Install render_test_results dependencies
if: always()
shell: bash
run: |
python3 -m pip install junitparser==2.1.1 rich==10.9.0
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
if: always()
shell: bash
# Encoding is weird on windows, just try to default to utf-8 if possible
env:
PYTHONIOENCODING: "utf-8"
run: |
python3 tools/render_junit.py test/
- name: Zip test reports for upload
if: always()
env:
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
run: |
# Remove any previous test reports if they exist
rm -f test-reports-*.zip
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
- uses: actions/upload-artifact@v2
name: Store Test Reports
if: always()
with:
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Reports on S3
if: always()
with:
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- name: Display and upload test statistics (Click Me)
if: always()
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
JOB_BASE_NAME: linux-xenial-cuda10.2-py3.6-gcc7-test
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
shell: bash
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install boto3==1.16.34
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af

View File

@ -1,562 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: linux-xenial-cuda11.3-py3.6-gcc7
on:
pull_request:
types: [opened, synchronize, reopened, unassigned]
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: linux-xenial-cuda11.3-py3.6-gcc7
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
concurrency:
group: linux-xenial-cuda11.3-py3.6-gcc7-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
if: ${{ (github.repository == 'pytorch/pytorch') && ((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cuda') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux'))) }}
env:
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
steps:
- name: noop
run: echo running ciflow_should_run
- name: print labels
run: echo "${LABELS}"
calculate-docker-image:
runs-on: linux.2xlarge
needs: [ciflow_should_run]
env:
DOCKER_BUILDKIT: 1
timeout-minutes: 90
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: false
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
# Check if image already exists, if it does then skip building it
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
exit 0
fi
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
# if we're on the base branch then use the parent commit
MERGE_BASE=$(git rev-parse HEAD~)
else
# otherwise we're on a PR, so use the most recent base commit
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
fi
# Covers the case where a previous tag doesn't exist for the tree
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
exit 1
fi
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
# If no image exists but the hash is the same as the previous hash then we should error out here
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
echo " contact the PyTorch team to restore the original images"
exit 1
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
DOCKER_SKIP_S3_UPLOAD: 1
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
cd .circleci/docker && ./build_docker.sh
build:
runs-on: linux.2xlarge
needs: [calculate-docker-image, ciflow_should_run]
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: linux-xenial-cuda11.3-py3.6-gcc7-build
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Build
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e PR_LABELS \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
IS_GHA: 1
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26 boto3==1.16.34
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Archive artifacts into zip
run: |
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json
- uses: seemethere/upload-artifact-s3@v3
name: Store PyTorch Build Artifacts on S3
with:
name: ${{ env.BUILD_ENVIRONMENT }}
retention-days: 14
if-no-files-found: error
path:
artifacts.zip
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af
generate-test-matrix:
runs-on: ubuntu-18.04
needs: [ciflow_should_run]
env:
TEST_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
ENABLE_DISTRIBUTED_TEST: 1
ENABLE_JIT_LEGACY_TEST: ''
ENABLE_MULTIGPU_TEST: ''
ENABLE_NOGPU_NO_AVX_TEST: ''
ENABLE_NOGPU_NO_AVX2_TEST: ''
ENABLE_SLOW_TEST: ''
ENABLE_DOCS_TEST: ''
ENABLE_BACKWARDS_COMPAT_TEST: ''
ENABLE_XLA_TEST: ''
ENABLE_NOARCH_TEST: ''
NUM_TEST_SHARDS: 2
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
NOGPU_RUNNER_TYPE: linux.2xlarge
PR_BODY: ${{ github.event.pull_request.body }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions==3.10
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
- name: Generating test matrix
id: set-matrix
run: .github/scripts/generate_pytorch_test_matrix.py
test:
needs: [calculate-docker-image, build, generate-test-matrix, ciflow_should_run]
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: linux-xenial-cuda11.3-py3.6-gcc7-test
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
run: |
bash .github/scripts/install_nvidia_utils_linux.sh
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
- name: Determine shm-size
run: |
shm_size="1g"
case "${BUILD_ENVIRONMENT}" in
*cuda*)
shm_size="2g"
;;
*rocm*)
shm_size="8g"
;;
esac
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
name: Download PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
- name: Unzip artifacts
run: |
unzip -o artifacts.zip
- name: Output disk space left
run: |
sudo df -H
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Test
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
IS_GHA: 1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
AWS_DEFAULT_REGION: us-east-1
run: |
if [[ $TEST_CONFIG == 'multigpu' ]]; then
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
# detached container should get cleaned up by teardown_ec2_linux
# TODO: Stop building test binaries as part of the build phase
# Used for GPU_FLAG since that doesn't play nice
# shellcheck disable=SC2086
container_name=$(docker run \
${GPU_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e PR_NUMBER \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e GITHUB_ACTIONS \
-e IN_CI \
-e IS_GHA \
-e CIRCLE_BRANCH \
-e CIRCLE_SHA1 \
-e CIRCLE_PR_NUMBER \
-e AWS_DEFAULT_REGION \
-e IN_WHEEL_TEST \
-e SHARD_NUMBER \
-e JOB_BASE_NAME \
-e TEST_CONFIG \
-e NUM_TEST_SHARDS \
-e PYTORCH_IGNORE_DISABLED_ISSUES \
-e PR_LABELS \
-e CONTINUE_THROUGH_ERROR \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="${SHM_SIZE}" \
--tty \
--detach \
--name="${container_name}" \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Install render_test_results dependencies
if: always()
shell: bash
run: |
python3 -m pip install junitparser==2.1.1 rich==10.9.0
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
if: always()
shell: bash
# Encoding is weird on windows, just try to default to utf-8 if possible
env:
PYTHONIOENCODING: "utf-8"
run: |
python3 tools/render_junit.py test/
- name: Zip test reports for upload
if: always()
env:
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
run: |
# Remove any previous test reports if they exist
rm -f test-reports-*.zip
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
- uses: actions/upload-artifact@v2
name: Store Test Reports
if: always()
with:
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Reports on S3
if: always()
with:
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- name: Display and upload test statistics (Click Me)
if: always()
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
JOB_BASE_NAME: linux-xenial-cuda11.3-py3.6-gcc7-test
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
shell: bash
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install boto3==1.16.34
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af

View File

@ -1,709 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: linux-xenial-py3.6-gcc5.4
on:
pull_request:
types: [opened, synchronize, reopened, unassigned]
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: linux-xenial-py3.6-gcc5.4
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
concurrency:
group: linux-xenial-py3.6-gcc5.4-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
if: ${{ (github.repository_owner == 'pytorch') && ((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cpu') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux'))) }}
env:
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
steps:
- name: noop
run: echo running ciflow_should_run
- name: print labels
run: echo "${LABELS}"
calculate-docker-image:
runs-on: linux.2xlarge
needs: [ciflow_should_run]
env:
DOCKER_BUILDKIT: 1
timeout-minutes: 90
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: false
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
# Check if image already exists, if it does then skip building it
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
exit 0
fi
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
# if we're on the base branch then use the parent commit
MERGE_BASE=$(git rev-parse HEAD~)
else
# otherwise we're on a PR, so use the most recent base commit
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
fi
# Covers the case where a previous tag doesn't exist for the tree
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
exit 1
fi
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
# If no image exists but the hash is the same as the previous hash then we should error out here
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
echo " contact the PyTorch team to restore the original images"
exit 1
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
DOCKER_SKIP_S3_UPLOAD: 1
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
cd .circleci/docker && ./build_docker.sh
build:
runs-on: linux.2xlarge
needs: [calculate-docker-image, ciflow_should_run]
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: linux-xenial-py3.6-gcc5.4-build
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Build
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e PR_LABELS \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
IS_GHA: 1
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26 boto3==1.16.34
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Archive artifacts into zip
run: |
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json
- uses: seemethere/upload-artifact-s3@v3
name: Store PyTorch Build Artifacts on S3
with:
name: ${{ env.BUILD_ENVIRONMENT }}
retention-days: 14
if-no-files-found: error
path:
artifacts.zip
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af
generate-test-matrix:
runs-on: ubuntu-18.04
needs: [ciflow_should_run]
env:
TEST_RUNNER_TYPE: linux.2xlarge
ENABLE_DISTRIBUTED_TEST: 1
ENABLE_JIT_LEGACY_TEST: 1
ENABLE_MULTIGPU_TEST: ''
ENABLE_NOGPU_NO_AVX_TEST: ''
ENABLE_NOGPU_NO_AVX2_TEST: ''
ENABLE_SLOW_TEST: ''
ENABLE_DOCS_TEST: 1
ENABLE_BACKWARDS_COMPAT_TEST: 1
ENABLE_XLA_TEST: ''
ENABLE_NOARCH_TEST: ''
NUM_TEST_SHARDS: 2
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
NOGPU_RUNNER_TYPE: linux.2xlarge
PR_BODY: ${{ github.event.pull_request.body }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions==3.10
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
- name: Generating test matrix
id: set-matrix
run: .github/scripts/generate_pytorch_test_matrix.py
test:
needs: [calculate-docker-image, build, generate-test-matrix, ciflow_should_run]
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: linux-xenial-py3.6-gcc5.4-test
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
run: |
bash .github/scripts/install_nvidia_utils_linux.sh
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
- name: Determine shm-size
run: |
shm_size="1g"
case "${BUILD_ENVIRONMENT}" in
*cuda*)
shm_size="2g"
;;
*rocm*)
shm_size="8g"
;;
esac
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
name: Download PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
- name: Unzip artifacts
run: |
unzip -o artifacts.zip
- name: Output disk space left
run: |
sudo df -H
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Test
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
IS_GHA: 1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
AWS_DEFAULT_REGION: us-east-1
run: |
if [[ $TEST_CONFIG == 'multigpu' ]]; then
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
# detached container should get cleaned up by teardown_ec2_linux
# TODO: Stop building test binaries as part of the build phase
# Used for GPU_FLAG since that doesn't play nice
# shellcheck disable=SC2086
container_name=$(docker run \
${GPU_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e PR_NUMBER \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e GITHUB_ACTIONS \
-e IN_CI \
-e IS_GHA \
-e CIRCLE_BRANCH \
-e CIRCLE_SHA1 \
-e CIRCLE_PR_NUMBER \
-e AWS_DEFAULT_REGION \
-e IN_WHEEL_TEST \
-e SHARD_NUMBER \
-e JOB_BASE_NAME \
-e TEST_CONFIG \
-e NUM_TEST_SHARDS \
-e PYTORCH_IGNORE_DISABLED_ISSUES \
-e PR_LABELS \
-e CONTINUE_THROUGH_ERROR \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="${SHM_SIZE}" \
--tty \
--detach \
--name="${container_name}" \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Install render_test_results dependencies
if: always()
shell: bash
run: |
python3 -m pip install junitparser==2.1.1 rich==10.9.0
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
if: always()
shell: bash
# Encoding is weird on windows, just try to default to utf-8 if possible
env:
PYTHONIOENCODING: "utf-8"
run: |
python3 tools/render_junit.py test/
- name: Zip test reports for upload
if: always()
env:
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
run: |
# Remove any previous test reports if they exist
rm -f test-reports-*.zip
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
- uses: actions/upload-artifact@v2
name: Store Test Reports
if: always()
with:
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Reports on S3
if: always()
with:
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- name: Display and upload test statistics (Click Me)
if: always()
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
JOB_BASE_NAME: linux-xenial-py3.6-gcc5.4-test
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
shell: bash
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install boto3==1.16.34
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
build-docs:
runs-on: linux.2xlarge
strategy:
matrix:
docs_type: [cpp, python]
needs: [calculate-docker-image, build, ciflow_should_run]
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
DOCS_TYPE: ${{ matrix.docs_type }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
name: Download PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
- name: Unzip artifacts
run: |
unzip -o artifacts.zip
- name: Build ${{ matrix.docs_type }} docs
run: |
set -ex
time docker pull "${DOCKER_IMAGE}" > /dev/null
echo "${GITHUB_REF}"
ref=${GITHUB_REF##*/}
target=${ref//v}
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e IN_CI \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e CIRCLE_SHA1="$GITHUB_SHA" \
-e DOCS_VERSION="${target}" \
-e DOCS_TYPE \
-e PR_LABELS \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" bash -c "sudo chown -R jenkins . && pip install dist/*.whl && ./.circleci/scripts/${DOCS_TYPE}_doc_push_script.sh"
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- uses: seemethere/upload-artifact-s3@v3
name: Upload Python Docs Preview
if: ${{ github.event_name == 'pull_request' && matrix.docs_type == 'python' }}
with:
retention-days: 14
s3-bucket: doc-previews
if-no-files-found: error
path: pytorch.github.io/docs/merge/
s3-prefix: pytorch/${{ github.event.pull_request.number }}
- uses: seemethere/upload-artifact-s3@v3
name: Upload C++ Docs Preview
if: ${{ github.event_name == 'pull_request' && matrix.docs_type == 'cpp' }}
with:
retention-days: 14
if-no-files-found: error
s3-bucket: doc-previews
path: cppdocs/
s3-prefix: pytorch/${{ github.event.pull_request.number }}/cppdocs
- name: Archive artifacts into zip
run: |
zip -r "docs_${DOCS_TYPE}.zip" "${GITHUB_WORKSPACE}/pytorch.github.io" "${GITHUB_WORKSPACE}/cppdocs"
- uses: actions/upload-artifact@v2
name: Store PyTorch Build Artifacts
with:
name: docs_${{ matrix.docs_type }}
path: docs_${{ matrix.docs_type }}.zip
if-no-files-found: error
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af

View File

@ -1,367 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/bazel_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: linux-xenial-py3.6-gcc7-bazel-test
on:
pull_request:
types: [opened, synchronize, reopened, unassigned]
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: linux-xenial-py3.6-gcc7-bazel-test
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
concurrency:
group: linux-xenial-py3.6-gcc7-bazel-test-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
if: ${{ (github.repository == 'pytorch/pytorch') && ((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/bazel') || contains(github.event.pull_request.labels.*.name, 'ciflow/cpu') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux'))) }}
env:
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
steps:
- name: noop
run: echo running ciflow_should_run
- name: print labels
run: echo "${LABELS}"
calculate-docker-image:
runs-on: linux.2xlarge
needs: [ciflow_should_run]
env:
DOCKER_BUILDKIT: 1
timeout-minutes: 90
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: false
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
# Check if image already exists, if it does then skip building it
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
exit 0
fi
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
# if we're on the base branch then use the parent commit
MERGE_BASE=$(git rev-parse HEAD~)
else
# otherwise we're on a PR, so use the most recent base commit
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
fi
# Covers the case where a previous tag doesn't exist for the tree
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
exit 1
fi
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
# If no image exists but the hash is the same as the previous hash then we should error out here
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
echo " contact the PyTorch team to restore the original images"
exit 1
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
DOCKER_SKIP_S3_UPLOAD: 1
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
cd .circleci/docker && ./build_docker.sh
# building and testing in a single job since bazel runs only small subset of tests
build-and-test:
runs-on: linux.2xlarge
needs: [calculate-docker-image, ciflow_should_run]
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: linux-xenial-py3.6-gcc7-bazel-test-build-and-test
NUM_TEST_SHARDS: 1
CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Determine shm-size
run: |
shm_size="1g"
case "${BUILD_ENVIRONMENT}" in
*cuda*)
shm_size="2g"
;;
*rocm*)
shm_size="8g"
;;
esac
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
- name: Output disk space left
run: |
sudo df -H
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Build
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e PR_LABELS \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && sudo chown -R jenkins /dev && .jenkins/pytorch/build.sh'
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Test
run: |
# detached container should get cleaned up by teardown_ec2_linux
export SHARD_NUMBER=0
# TODO: Stop building test binaries as part of the build phase
# Make sure we copy test results from bazel-testlogs symlink to
# a regular directory ./test/test-reports
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e GITHUB_ACTIONS \
-e IN_CI \
-e SHARD_NUMBER \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e CONTINUE_THROUGH_ERROR \
-e PR_LABELS \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="${SHM_SIZE}" \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && sudo chown -R jenkins /dev && .jenkins/pytorch/test.sh && cp -Lr ./bazel-testlogs ./test/test-reports'
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Zip test reports for upload
if: always()
env:
FILE_SUFFIX: 'bazel-${{ github.job }}'
run: |
# Remove any previous test reports if they exist
rm -f test-reports-*.zip
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
- uses: actions/upload-artifact@v2
name: Store Test Reports
if: always()
with:
name: test-reports-bazel
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Reports on S3
if: always()
with:
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- name: Display and upload test statistics (Click Me)
if: always()
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
JOB_BASE_NAME: linux-xenial-py3.6-gcc7-bazel-test-test
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
shell: bash
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install boto3==1.16.34
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af

View File

@ -1,562 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: parallelnative-linux-xenial-py3.6-gcc5.4
on:
pull_request:
types: [unassigned]
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: parallelnative-linux-xenial-py3.6-gcc5.4
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
concurrency:
group: parallelnative-linux-xenial-py3.6-gcc5.4-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
if: ${{ (github.repository == 'pytorch/pytorch') && ((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cpu') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux'))) }}
env:
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
steps:
- name: noop
run: echo running ciflow_should_run
- name: print labels
run: echo "${LABELS}"
calculate-docker-image:
runs-on: linux.2xlarge
needs: [ciflow_should_run]
env:
DOCKER_BUILDKIT: 1
timeout-minutes: 90
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: false
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
# Check if image already exists, if it does then skip building it
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
exit 0
fi
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
# if we're on the base branch then use the parent commit
MERGE_BASE=$(git rev-parse HEAD~)
else
# otherwise we're on a PR, so use the most recent base commit
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
fi
# Covers the case where a previous tag doesn't exist for the tree
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
exit 1
fi
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
# If no image exists but the hash is the same as the previous hash then we should error out here
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
echo " contact the PyTorch team to restore the original images"
exit 1
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
DOCKER_SKIP_S3_UPLOAD: 1
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
cd .circleci/docker && ./build_docker.sh
build:
runs-on: linux.2xlarge
needs: [calculate-docker-image, ciflow_should_run]
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: parallelnative-linux-xenial-py3.6-gcc5.4-build
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Build
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e PR_LABELS \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
IS_GHA: 1
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26 boto3==1.16.34
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Archive artifacts into zip
run: |
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json
- uses: seemethere/upload-artifact-s3@v3
name: Store PyTorch Build Artifacts on S3
with:
name: ${{ env.BUILD_ENVIRONMENT }}
retention-days: 14
if-no-files-found: error
path:
artifacts.zip
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af
generate-test-matrix:
runs-on: ubuntu-18.04
needs: [ciflow_should_run]
env:
TEST_RUNNER_TYPE: linux.2xlarge
ENABLE_DISTRIBUTED_TEST: 1
ENABLE_JIT_LEGACY_TEST: ''
ENABLE_MULTIGPU_TEST: ''
ENABLE_NOGPU_NO_AVX_TEST: ''
ENABLE_NOGPU_NO_AVX2_TEST: ''
ENABLE_SLOW_TEST: ''
ENABLE_DOCS_TEST: ''
ENABLE_BACKWARDS_COMPAT_TEST: ''
ENABLE_XLA_TEST: ''
ENABLE_NOARCH_TEST: ''
NUM_TEST_SHARDS: 1
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
NOGPU_RUNNER_TYPE: linux.2xlarge
PR_BODY: ${{ github.event.pull_request.body }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions==3.10
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
- name: Generating test matrix
id: set-matrix
run: .github/scripts/generate_pytorch_test_matrix.py
test:
needs: [calculate-docker-image, build, generate-test-matrix, ciflow_should_run]
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: parallelnative-linux-xenial-py3.6-gcc5.4-test
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
run: |
bash .github/scripts/install_nvidia_utils_linux.sh
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
- name: Determine shm-size
run: |
shm_size="1g"
case "${BUILD_ENVIRONMENT}" in
*cuda*)
shm_size="2g"
;;
*rocm*)
shm_size="8g"
;;
esac
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
name: Download PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
- name: Unzip artifacts
run: |
unzip -o artifacts.zip
- name: Output disk space left
run: |
sudo df -H
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Test
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
IS_GHA: 1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
AWS_DEFAULT_REGION: us-east-1
run: |
if [[ $TEST_CONFIG == 'multigpu' ]]; then
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
# detached container should get cleaned up by teardown_ec2_linux
# TODO: Stop building test binaries as part of the build phase
# Used for GPU_FLAG since that doesn't play nice
# shellcheck disable=SC2086
container_name=$(docker run \
${GPU_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e PR_NUMBER \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e GITHUB_ACTIONS \
-e IN_CI \
-e IS_GHA \
-e CIRCLE_BRANCH \
-e CIRCLE_SHA1 \
-e CIRCLE_PR_NUMBER \
-e AWS_DEFAULT_REGION \
-e IN_WHEEL_TEST \
-e SHARD_NUMBER \
-e JOB_BASE_NAME \
-e TEST_CONFIG \
-e NUM_TEST_SHARDS \
-e PYTORCH_IGNORE_DISABLED_ISSUES \
-e PR_LABELS \
-e CONTINUE_THROUGH_ERROR \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="${SHM_SIZE}" \
--tty \
--detach \
--name="${container_name}" \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Install render_test_results dependencies
if: always()
shell: bash
run: |
python3 -m pip install junitparser==2.1.1 rich==10.9.0
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
if: always()
shell: bash
# Encoding is weird on windows, just try to default to utf-8 if possible
env:
PYTHONIOENCODING: "utf-8"
run: |
python3 tools/render_junit.py test/
- name: Zip test reports for upload
if: always()
env:
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
run: |
# Remove any previous test reports if they exist
rm -f test-reports-*.zip
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
- uses: actions/upload-artifact@v2
name: Store Test Reports
if: always()
with:
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Reports on S3
if: always()
with:
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- name: Display and upload test statistics (Click Me)
if: always()
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
JOB_BASE_NAME: parallelnative-linux-xenial-py3.6-gcc5.4-test
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
shell: bash
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install boto3==1.16.34
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af

View File

@ -1,281 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7
on:
pull_request:
types: [unassigned]
schedule:
- cron: 45 0,4,8,12,16,20 * * *
workflow_dispatch:
env:
BUILD_ENVIRONMENT: periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
concurrency:
group: periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
if: ${{ (github.repository == 'pytorch/pytorch') && ((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cuda') || contains(github.event.pull_request.labels.*.name, 'ciflow/libtorch') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/scheduled'))) }}
env:
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
steps:
- name: noop
run: echo running ciflow_should_run
- name: print labels
run: echo "${LABELS}"
calculate-docker-image:
runs-on: linux.2xlarge
needs: [ciflow_should_run]
env:
DOCKER_BUILDKIT: 1
timeout-minutes: 90
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: false
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
# Check if image already exists, if it does then skip building it
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
exit 0
fi
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
# if we're on the base branch then use the parent commit
MERGE_BASE=$(git rev-parse HEAD~)
else
# otherwise we're on a PR, so use the most recent base commit
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
fi
# Covers the case where a previous tag doesn't exist for the tree
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
exit 1
fi
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
# If no image exists but the hash is the same as the previous hash then we should error out here
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
echo " contact the PyTorch team to restore the original images"
exit 1
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
DOCKER_SKIP_S3_UPLOAD: 1
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
cd .circleci/docker && ./build_docker.sh
build:
runs-on: linux.2xlarge
needs: [calculate-docker-image, ciflow_should_run]
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7-build
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Build
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e PR_LABELS \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
IS_GHA: 1
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26 boto3==1.16.34
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af

View File

@ -1,560 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: periodic-linux-xenial-cuda11.1-py3.6-gcc7
on:
pull_request:
types: [unassigned]
schedule:
- cron: 45 0,4,8,12,16,20 * * *
workflow_dispatch:
env:
BUILD_ENVIRONMENT: periodic-linux-xenial-cuda11.1-py3.6-gcc7
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
concurrency:
group: periodic-linux-xenial-cuda11.1-py3.6-gcc7-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
if: ${{ (github.repository == 'pytorch/pytorch') && ((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cuda') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/scheduled'))) }}
env:
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
steps:
- name: noop
run: echo running ciflow_should_run
- name: print labels
run: echo "${LABELS}"
calculate-docker-image:
runs-on: linux.2xlarge
needs: [ciflow_should_run]
env:
DOCKER_BUILDKIT: 1
timeout-minutes: 90
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: false
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
# Check if image already exists, if it does then skip building it
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
exit 0
fi
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
# if we're on the base branch then use the parent commit
MERGE_BASE=$(git rev-parse HEAD~)
else
# otherwise we're on a PR, so use the most recent base commit
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
fi
# Covers the case where a previous tag doesn't exist for the tree
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
exit 1
fi
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
# If no image exists but the hash is the same as the previous hash then we should error out here
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
echo " contact the PyTorch team to restore the original images"
exit 1
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
DOCKER_SKIP_S3_UPLOAD: 1
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
cd .circleci/docker && ./build_docker.sh
build:
runs-on: linux.2xlarge
needs: [calculate-docker-image, ciflow_should_run]
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.6-gcc7-build
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Build
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e PR_LABELS \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
IS_GHA: 1
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26 boto3==1.16.34
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Archive artifacts into zip
run: |
zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json
- uses: seemethere/upload-artifact-s3@v3
name: Store PyTorch Build Artifacts on S3
with:
name: ${{ env.BUILD_ENVIRONMENT }}
retention-days: 14
if-no-files-found: error
path:
artifacts.zip
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af
generate-test-matrix:
runs-on: ubuntu-18.04
needs: [ciflow_should_run]
env:
TEST_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
ENABLE_DISTRIBUTED_TEST: 1
ENABLE_JIT_LEGACY_TEST: ''
ENABLE_MULTIGPU_TEST: ''
ENABLE_NOGPU_NO_AVX_TEST: ''
ENABLE_NOGPU_NO_AVX2_TEST: ''
ENABLE_SLOW_TEST: ''
ENABLE_DOCS_TEST: ''
ENABLE_BACKWARDS_COMPAT_TEST: ''
ENABLE_XLA_TEST: ''
ENABLE_NOARCH_TEST: ''
NUM_TEST_SHARDS: 2
MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
NOGPU_RUNNER_TYPE: linux.2xlarge
PR_BODY: ${{ github.event.pull_request.body }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions==3.10
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
- name: Generating test matrix
id: set-matrix
run: .github/scripts/generate_pytorch_test_matrix.py
test:
needs: [calculate-docker-image, build, generate-test-matrix, ciflow_should_run]
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
env:
DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.6-gcc7-test
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
bash /tmp/ecr-login.sh
rm /tmp/ecr-login.sh
- name: Chown workspace
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
# Ensure the working directory gets chowned back to the current user
retry docker pull "${ALPINE_IMAGE}"
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Pull docker image
run: |
docker pull "${DOCKER_IMAGE}"
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
run: |
bash .github/scripts/install_nvidia_utils_linux.sh
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
- name: Determine shm-size
run: |
shm_size="1g"
case "${BUILD_ENVIRONMENT}" in
*cuda*)
shm_size="2g"
;;
*rocm*)
shm_size="8g"
;;
esac
echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
name: Download PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
- name: Unzip artifacts
run: |
unzip -o artifacts.zip
- name: Output disk space left
run: |
sudo df -H
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Test
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
IS_GHA: 1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
AWS_DEFAULT_REGION: us-east-1
run: |
if [[ $TEST_CONFIG == 'multigpu' ]]; then
TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
else
TEST_COMMAND=.jenkins/pytorch/test.sh
fi
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
# detached container should get cleaned up by teardown_ec2_linux
# TODO: Stop building test binaries as part of the build phase
# Used for GPU_FLAG since that doesn't play nice
# shellcheck disable=SC2086
container_name=$(docker run \
${GPU_FLAG:-} \
-e BUILD_ENVIRONMENT \
-e PR_NUMBER \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e GITHUB_ACTIONS \
-e IN_CI \
-e IS_GHA \
-e CIRCLE_BRANCH \
-e CIRCLE_SHA1 \
-e CIRCLE_PR_NUMBER \
-e AWS_DEFAULT_REGION \
-e IN_WHEEL_TEST \
-e SHARD_NUMBER \
-e JOB_BASE_NAME \
-e TEST_CONFIG \
-e NUM_TEST_SHARDS \
-e PYTORCH_IGNORE_DISABLED_ISSUES \
-e PR_LABELS \
-e CONTINUE_THROUGH_ERROR \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--shm-size="${SHM_SIZE}" \
--tty \
--detach \
--name="${container_name}" \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Install render_test_results dependencies
if: always()
shell: bash
run: |
python3 -m pip install junitparser==2.1.1 rich==10.9.0
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
if: always()
shell: bash
# Encoding is weird on windows, just try to default to utf-8 if possible
env:
PYTHONIOENCODING: "utf-8"
run: |
python3 tools/render_junit.py test/
- name: Zip test reports for upload
if: always()
env:
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
run: |
# Remove any previous test reports if they exist
rm -f test-reports-*.zip
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
- uses: actions/upload-artifact@v2
name: Store Test Reports
if: always()
with:
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Reports on S3
if: always()
with:
retention-days: 14
if-no-files-found: error
path:
test-reports-*.zip
- name: Display and upload test statistics (Click Me)
if: always()
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
JOB_BASE_NAME: periodic-linux-xenial-cuda11.1-py3.6-gcc7-test
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
shell: bash
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install boto3==1.16.34
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
env:
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af

View File

@ -1,314 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/windows_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: periodic-win-vs2019-cuda11.1-py3
on:
pull_request:
types: [unassigned]
schedule:
- cron: 45 0,4,8,12,16,20 * * *
workflow_dispatch:
env:
BUILD_ENVIRONMENT: periodic-win-vs2019-cuda11.1-py3
BUILD_WHEEL: 1
CUDA_VERSION: "11.1"
IN_CI: 1
INSTALL_WINDOWS_SDK: 1
PYTHON_VERSION: "3.8"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
SCCACHE_BUCKET: "ossci-compiler-cache"
VC_PRODUCT: "BuildTools"
VC_VERSION: ""
VS_VERSION: "16.8.6"
VC_YEAR: "2019"
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
no_proxy: localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock
TORCH_CUDA_ARCH_LIST: "7.0"
USE_CUDA: 1
concurrency:
group: periodic-win-vs2019-cuda11.1-py3-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
if: ${{ (github.repository == 'pytorch/pytorch') && ((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cuda') || contains(github.event.pull_request.labels.*.name, 'ciflow/scheduled') || contains(github.event.pull_request.labels.*.name, 'ciflow/win'))) }}
steps:
- name: noop
run: echo running ciflow_should_run
build:
runs-on: "windows.4xlarge"
defaults:
run:
working-directory: pytorch-${{ github.run_id }}
needs: [ciflow_should_run]
env:
JOB_BASE_NAME: periodic-win-vs2019-cuda11.1-py3-build
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
steps:
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
submodules: recursive
path: pytorch-${{ github.run_id }}
# deep clone, to allow use of git merge-base
fetch-depth: 0
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Install Visual Studio 2019 toolchain
shell: powershell
run: |
.\.circleci\scripts\vs_install.ps1
- name: Install Cuda
shell: bash
run: |
.circleci/scripts/windows_cuda_install.sh
- name: Install Cudnn
shell: bash
run: |
.circleci/scripts/windows_cudnn_install.sh
- name: Build
shell: bash
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: |
.jenkins/pytorch/win-build.sh
# Upload to github so that people can click and download artifacts
- name: Upload artifacts to Github
if: always()
uses: actions/upload-artifact@v2
# Don't fail on upload to GH since it's only for user convenience
continue-on-error: true
with:
retention-days: 14
if-no-files-found: error
name: ${{ env.BUILD_ENVIRONMENT }}
path: C:\${{ github.run_id }}\build-results
- name: Upload artifacts to s3
if: always()
uses: seemethere/upload-artifact-s3@v3
with:
retention-days: 14
if-no-files-found: error
name: ${{ env.BUILD_ENVIRONMENT }}
path: C:\${{ github.run_id }}\build-results
- name: Wait until all sessions have drained
shell: powershell
if: always()
timeout-minutes: 120
run: |
.github\scripts\wait_for_ssh_to_drain.ps1
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
shell: powershell
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- name: Cleanup build-results and workspaces
if: always()
shell: bash
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
# Should remove the entirety of pytorch-${{ github.run_id }}
run: |
rm -rf "${PYTORCH_FINAL_PACKAGE_DIR}"
rm -rf ./*
generate-test-matrix:
needs: [ciflow_should_run]
runs-on: ubuntu-18.04
env:
TEST_RUNNER_TYPE: windows.8xlarge.nvidia.gpu
NUM_TEST_SHARDS: 2
NUM_TEST_SHARDS_ON_PULL_REQUEST: 2
PR_BODY: ${{ github.event.pull_request.body }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions==3.10
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
- name: Generating test matrix
id: set-matrix
run: .github/scripts/generate_pytorch_test_matrix.py
test:
env:
JOB_BASE_NAME: periodic-win-vs2019-cuda11.1-py3-test
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
TEST_CONFIG: ${{ matrix.config }}
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
RUN_SMOKE_TESTS_ONLY_ON_PR: False
PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
needs: [build, generate-test-matrix, ciflow_should_run]
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
defaults:
run:
working-directory: pytorch-${{ github.run_id }}
steps:
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
submodules: recursive
path: pytorch-${{ github.run_id }}
# deep clone, to allow use of git merge-base
fetch-depth: 0
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Install Visual Studio 2019 toolchain
shell: powershell
run: |
.\.circleci\scripts\vs_install.ps1
- name: Install Cuda
shell: bash
run: |
.circleci/scripts/windows_cuda_install.sh
- name: Install Cudnn
shell: bash
run: |
.circleci/scripts/windows_cudnn_install.sh
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
name: Download PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
path: C:\${{ github.run_id }}\build-results
- name: Check build-results folder
shell: powershell
run: |
tree /F C:\$Env:GITHUB_RUN_ID\build-results
# Needed for coverage in win-test.sh
- uses: actions/setup-python@v2
name: Setup Python3
with:
python-version: '3.x'
- name: Test
shell: bash
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: |
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
if [[ -n $GITHUB_HEAD_REF && "$RUN_SMOKE_TESTS_ONLY_ON_PR" == "true" ]]; then
export RUN_SMOKE_TESTS_ONLY=1
fi
.jenkins/pytorch/win-test.sh
- name: Zip test reports for upload
if: always()
env:
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
shell: powershell
run: |
# -ir => recursive include all files in pattern
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
- uses: actions/upload-artifact@v2
name: Store Test Reports
if: always()
with:
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
pytorch-${{ github.run_id }}/test-reports-*.zip
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Reports on S3
if: always()
with:
retention-days: 14
if-no-files-found: error
path:
pytorch-${{ github.run_id }}/test-reports-*.zip
- name: Install render_test_results dependencies
if: always()
shell: bash
run: |
python3 -m pip install junitparser==2.1.1 rich==10.9.0
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
if: always()
shell: bash
# Encoding is weird on windows, just try to default to utf-8 if possible
env:
PYTHONIOENCODING: "utf-8"
run: |
python3 tools/render_junit.py test/
- name: Wait until all sessions have drained
shell: powershell
if: always()
timeout-minutes: 120
run: |
.github\scripts\wait_for_ssh_to_drain.ps1
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
shell: powershell
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload test statistics (Click Me)
if: always()
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
JOB_BASE_NAME: periodic-win-vs2019-cuda11.1-py3-test
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
shell: bash
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install boto3==1.16.34
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
- name: Cleanup workspace
if: always()
shell: bash
# Should remove the entirety of pytorch-${{ github.run_id }}
run: |
rm -rf ./*

View File

@ -1,298 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/windows_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: win-vs2019-cpu-py3
on:
pull_request:
types: [opened, synchronize, reopened, unassigned]
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: win-vs2019-cpu-py3
BUILD_WHEEL: 1
CUDA_VERSION: "cpu"
IN_CI: 1
INSTALL_WINDOWS_SDK: 1
PYTHON_VERSION: "3.8"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
SCCACHE_BUCKET: "ossci-compiler-cache"
VC_PRODUCT: "BuildTools"
VC_VERSION: ""
VS_VERSION: "16.8.6"
VC_YEAR: "2019"
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
no_proxy: localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock
concurrency:
group: win-vs2019-cpu-py3-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
if: ${{ (github.repository_owner == 'pytorch') && ((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cpu') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/win'))) }}
steps:
- name: noop
run: echo running ciflow_should_run
build:
runs-on: "windows.4xlarge"
defaults:
run:
working-directory: pytorch-${{ github.run_id }}
needs: [ciflow_should_run]
env:
JOB_BASE_NAME: win-vs2019-cpu-py3-build
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
steps:
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
submodules: recursive
path: pytorch-${{ github.run_id }}
# deep clone, to allow use of git merge-base
fetch-depth: 0
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Install Visual Studio 2019 toolchain
shell: powershell
run: |
.\.circleci\scripts\vs_install.ps1
- name: Build
shell: bash
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: |
.jenkins/pytorch/win-build.sh
# Upload to github so that people can click and download artifacts
- name: Upload artifacts to Github
if: always()
uses: actions/upload-artifact@v2
# Don't fail on upload to GH since it's only for user convenience
continue-on-error: true
with:
retention-days: 14
if-no-files-found: error
name: ${{ env.BUILD_ENVIRONMENT }}
path: C:\${{ github.run_id }}\build-results
- name: Upload artifacts to s3
if: always()
uses: seemethere/upload-artifact-s3@v3
with:
retention-days: 14
if-no-files-found: error
name: ${{ env.BUILD_ENVIRONMENT }}
path: C:\${{ github.run_id }}\build-results
- name: Wait until all sessions have drained
shell: powershell
if: always()
timeout-minutes: 120
run: |
.github\scripts\wait_for_ssh_to_drain.ps1
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
shell: powershell
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- name: Cleanup build-results and workspaces
if: always()
shell: bash
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
# Should remove the entirety of pytorch-${{ github.run_id }}
run: |
rm -rf "${PYTORCH_FINAL_PACKAGE_DIR}"
rm -rf ./*
generate-test-matrix:
needs: [ciflow_should_run]
runs-on: ubuntu-18.04
env:
TEST_RUNNER_TYPE: windows.4xlarge
NUM_TEST_SHARDS: 2
NUM_TEST_SHARDS_ON_PULL_REQUEST: 2
PR_BODY: ${{ github.event.pull_request.body }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions==3.10
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
- name: Generating test matrix
id: set-matrix
run: .github/scripts/generate_pytorch_test_matrix.py
test:
env:
JOB_BASE_NAME: win-vs2019-cpu-py3-test
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
TEST_CONFIG: ${{ matrix.config }}
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
RUN_SMOKE_TESTS_ONLY_ON_PR: False
PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
needs: [build, generate-test-matrix, ciflow_should_run]
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
defaults:
run:
working-directory: pytorch-${{ github.run_id }}
steps:
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
submodules: recursive
path: pytorch-${{ github.run_id }}
# deep clone, to allow use of git merge-base
fetch-depth: 0
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Install Visual Studio 2019 toolchain
shell: powershell
run: |
.\.circleci\scripts\vs_install.ps1
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
name: Download PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
path: C:\${{ github.run_id }}\build-results
- name: Check build-results folder
shell: powershell
run: |
tree /F C:\$Env:GITHUB_RUN_ID\build-results
# Needed for coverage in win-test.sh
- uses: actions/setup-python@v2
name: Setup Python3
with:
python-version: '3.x'
- name: Test
shell: bash
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: |
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
if [[ -n $GITHUB_HEAD_REF && "$RUN_SMOKE_TESTS_ONLY_ON_PR" == "true" ]]; then
export RUN_SMOKE_TESTS_ONLY=1
fi
.jenkins/pytorch/win-test.sh
- name: Zip test reports for upload
if: always()
env:
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
shell: powershell
run: |
# -ir => recursive include all files in pattern
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
- uses: actions/upload-artifact@v2
name: Store Test Reports
if: always()
with:
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
pytorch-${{ github.run_id }}/test-reports-*.zip
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Reports on S3
if: always()
with:
retention-days: 14
if-no-files-found: error
path:
pytorch-${{ github.run_id }}/test-reports-*.zip
- name: Install render_test_results dependencies
if: always()
shell: bash
run: |
python3 -m pip install junitparser==2.1.1 rich==10.9.0
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
if: always()
shell: bash
# Encoding is weird on windows, just try to default to utf-8 if possible
env:
PYTHONIOENCODING: "utf-8"
run: |
python3 tools/render_junit.py test/
- name: Wait until all sessions have drained
shell: powershell
if: always()
timeout-minutes: 120
run: |
.github\scripts\wait_for_ssh_to_drain.ps1
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
shell: powershell
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload test statistics (Click Me)
if: always()
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
JOB_BASE_NAME: win-vs2019-cpu-py3-test
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
shell: bash
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install boto3==1.16.34
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
- name: Cleanup workspace
if: always()
shell: bash
# Should remove the entirety of pytorch-${{ github.run_id }}
run: |
rm -rf ./*

View File

@ -1,316 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/windows_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: win-vs2019-cuda10.2-py3
on:
pull_request:
types: [unassigned]
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: win-vs2019-cuda10.2-py3
BUILD_WHEEL: 1
CUDA_VERSION: "10.2"
IN_CI: 1
INSTALL_WINDOWS_SDK: 1
PYTHON_VERSION: "3.8"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
SCCACHE_BUCKET: "ossci-compiler-cache"
VC_PRODUCT: "BuildTools"
VC_VERSION: ""
VS_VERSION: "16.8.6"
VC_YEAR: "2019"
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
no_proxy: localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock
TORCH_CUDA_ARCH_LIST: "7.0"
USE_CUDA: 1
concurrency:
group: win-vs2019-cuda10.2-py3-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
if: ${{ (github.repository == 'pytorch/pytorch') && ((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cuda') || contains(github.event.pull_request.labels.*.name, 'ciflow/win'))) }}
steps:
- name: noop
run: echo running ciflow_should_run
build:
runs-on: "windows.4xlarge"
defaults:
run:
working-directory: pytorch-${{ github.run_id }}
needs: [ciflow_should_run]
env:
JOB_BASE_NAME: win-vs2019-cuda10.2-py3-build
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
steps:
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
submodules: recursive
path: pytorch-${{ github.run_id }}
# deep clone, to allow use of git merge-base
fetch-depth: 0
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Install Visual Studio 2019 toolchain
shell: powershell
run: |
.\.circleci\scripts\vs_install.ps1
- name: Install Cuda
shell: bash
run: |
.circleci/scripts/windows_cuda_install.sh
- name: Install Cudnn
shell: bash
run: |
.circleci/scripts/windows_cudnn_install.sh
- name: Build
shell: bash
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: |
.jenkins/pytorch/win-build.sh
# Upload to github so that people can click and download artifacts
- name: Upload artifacts to Github
if: always()
uses: actions/upload-artifact@v2
# Don't fail on upload to GH since it's only for user convenience
continue-on-error: true
with:
retention-days: 14
if-no-files-found: error
name: ${{ env.BUILD_ENVIRONMENT }}
path: C:\${{ github.run_id }}\build-results
- name: Upload artifacts to s3
if: always()
uses: seemethere/upload-artifact-s3@v3
with:
retention-days: 14
if-no-files-found: error
name: ${{ env.BUILD_ENVIRONMENT }}
path: C:\${{ github.run_id }}\build-results
- name: Wait until all sessions have drained
shell: powershell
if: always()
timeout-minutes: 120
run: |
.github\scripts\wait_for_ssh_to_drain.ps1
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
shell: powershell
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- name: Cleanup build-results and workspaces
if: always()
shell: bash
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
# Should remove the entirety of pytorch-${{ github.run_id }}
run: |
rm -rf "${PYTORCH_FINAL_PACKAGE_DIR}"
rm -rf ./*
generate-test-matrix:
needs: [ciflow_should_run]
runs-on: ubuntu-18.04
env:
TEST_RUNNER_TYPE: windows.8xlarge.nvidia.gpu
NUM_TEST_SHARDS: 2
NUM_TEST_SHARDS_ON_PULL_REQUEST: 2
PR_BODY: ${{ github.event.pull_request.body }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions==3.10
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
- name: Generating test matrix
id: set-matrix
run: .github/scripts/generate_pytorch_test_matrix.py
test:
env:
JOB_BASE_NAME: win-vs2019-cuda10.2-py3-test
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
TEST_CONFIG: ${{ matrix.config }}
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
RUN_SMOKE_TESTS_ONLY_ON_PR: False
PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
needs: [build, generate-test-matrix, ciflow_should_run]
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
defaults:
run:
working-directory: pytorch-${{ github.run_id }}
steps:
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
submodules: recursive
path: pytorch-${{ github.run_id }}
# deep clone, to allow use of git merge-base
fetch-depth: 0
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Install Visual Studio 2019 toolchain
shell: powershell
run: |
.\.circleci\scripts\vs_install.ps1
- name: Install Cuda
shell: bash
run: |
.circleci/scripts/windows_cuda_install.sh
- name: Install Cudnn
shell: bash
run: |
.circleci/scripts/windows_cudnn_install.sh
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
name: Download PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
path: C:\${{ github.run_id }}\build-results
- name: Check build-results folder
shell: powershell
run: |
tree /F C:\$Env:GITHUB_RUN_ID\build-results
# Needed for coverage in win-test.sh
- uses: actions/setup-python@v2
name: Setup Python3
with:
python-version: '3.x'
- name: Test
shell: bash
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: |
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
if [[ -n $GITHUB_HEAD_REF && "$RUN_SMOKE_TESTS_ONLY_ON_PR" == "true" ]]; then
export RUN_SMOKE_TESTS_ONLY=1
fi
.jenkins/pytorch/win-test.sh
- name: Zip test reports for upload
if: always()
env:
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
shell: powershell
run: |
# -ir => recursive include all files in pattern
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
- uses: actions/upload-artifact@v2
name: Store Test Reports
if: always()
with:
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
pytorch-${{ github.run_id }}/test-reports-*.zip
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Reports on S3
if: always()
with:
retention-days: 14
if-no-files-found: error
path:
pytorch-${{ github.run_id }}/test-reports-*.zip
- name: Install render_test_results dependencies
if: always()
shell: bash
run: |
python3 -m pip install junitparser==2.1.1 rich==10.9.0
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
if: always()
shell: bash
# Encoding is weird on windows, just try to default to utf-8 if possible
env:
PYTHONIOENCODING: "utf-8"
run: |
python3 tools/render_junit.py test/
- name: Wait until all sessions have drained
shell: powershell
if: always()
timeout-minutes: 120
run: |
.github\scripts\wait_for_ssh_to_drain.ps1
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
shell: powershell
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload test statistics (Click Me)
if: always()
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
JOB_BASE_NAME: win-vs2019-cuda10.2-py3-test
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
shell: bash
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install boto3==1.16.34
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
- name: Cleanup workspace
if: always()
shell: bash
# Should remove the entirety of pytorch-${{ github.run_id }}
run: |
rm -rf ./*

View File

@ -1,316 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/windows_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: win-vs2019-cuda11.3-py3
on:
pull_request:
types: [opened, synchronize, reopened, unassigned]
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: win-vs2019-cuda11.3-py3
BUILD_WHEEL: 1
CUDA_VERSION: "11.3"
IN_CI: 1
INSTALL_WINDOWS_SDK: 1
PYTHON_VERSION: "3.8"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
SCCACHE_BUCKET: "ossci-compiler-cache"
VC_PRODUCT: "BuildTools"
VC_VERSION: ""
VS_VERSION: "16.8.6"
VC_YEAR: "2019"
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
no_proxy: localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock
TORCH_CUDA_ARCH_LIST: "7.0"
USE_CUDA: 1
concurrency:
group: win-vs2019-cuda11.3-py3-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
if: ${{ (github.repository_owner == 'pytorch') && ((github.event_name != 'pull_request') || (github.event.assignee.login != 'pytorchbot' ) || (github.event.action !='unassigned') || (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cuda') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/win'))) }}
steps:
- name: noop
run: echo running ciflow_should_run
build:
runs-on: "windows.4xlarge"
defaults:
run:
working-directory: pytorch-${{ github.run_id }}
needs: [ciflow_should_run]
env:
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-build
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
steps:
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
submodules: recursive
path: pytorch-${{ github.run_id }}
# deep clone, to allow use of git merge-base
fetch-depth: 0
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Install Visual Studio 2019 toolchain
shell: powershell
run: |
.\.circleci\scripts\vs_install.ps1
- name: Install Cuda
shell: bash
run: |
.circleci/scripts/windows_cuda_install.sh
- name: Install Cudnn
shell: bash
run: |
.circleci/scripts/windows_cudnn_install.sh
- name: Build
shell: bash
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: |
.jenkins/pytorch/win-build.sh
# Upload to github so that people can click and download artifacts
- name: Upload artifacts to Github
if: always()
uses: actions/upload-artifact@v2
# Don't fail on upload to GH since it's only for user convenience
continue-on-error: true
with:
retention-days: 14
if-no-files-found: error
name: ${{ env.BUILD_ENVIRONMENT }}
path: C:\${{ github.run_id }}\build-results
- name: Upload artifacts to s3
if: always()
uses: seemethere/upload-artifact-s3@v3
with:
retention-days: 14
if-no-files-found: error
name: ${{ env.BUILD_ENVIRONMENT }}
path: C:\${{ github.run_id }}\build-results
- name: Wait until all sessions have drained
shell: powershell
if: always()
timeout-minutes: 120
run: |
.github\scripts\wait_for_ssh_to_drain.ps1
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
shell: powershell
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- name: Cleanup build-results and workspaces
if: always()
shell: bash
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
# Should remove the entirety of pytorch-${{ github.run_id }}
run: |
rm -rf "${PYTORCH_FINAL_PACKAGE_DIR}"
rm -rf ./*
generate-test-matrix:
needs: [ciflow_should_run]
runs-on: ubuntu-18.04
env:
TEST_RUNNER_TYPE: windows.8xlarge.nvidia.gpu
NUM_TEST_SHARDS: 2
NUM_TEST_SHARDS_ON_PULL_REQUEST: 1
PR_BODY: ${{ github.event.pull_request.body }}
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
container:
image: python:3.9
steps:
- name: Install dependencies
run: pip install typing-extensions==3.10
- name: Clone pytorch/pytorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
- name: Generating test matrix
id: set-matrix
run: .github/scripts/generate_pytorch_test_matrix.py
test:
env:
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-test
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
TEST_CONFIG: ${{ matrix.config }}
http_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
https_proxy: "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128"
RUN_SMOKE_TESTS_ONLY_ON_PR: True
PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
needs: [build, generate-test-matrix, ciflow_should_run]
strategy:
matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
defaults:
run:
working-directory: pytorch-${{ github.run_id }}
steps:
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
submodules: recursive
path: pytorch-${{ github.run_id }}
# deep clone, to allow use of git merge-base
fetch-depth: 0
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Install Visual Studio 2019 toolchain
shell: powershell
run: |
.\.circleci\scripts\vs_install.ps1
- name: Install Cuda
shell: bash
run: |
.circleci/scripts/windows_cuda_install.sh
- name: Install Cudnn
shell: bash
run: |
.circleci/scripts/windows_cudnn_install.sh
- uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
name: Download PyTorch Build Artifacts
with:
name: ${{ env.BUILD_ENVIRONMENT }}
path: C:\${{ github.run_id }}\build-results
- name: Check build-results folder
shell: powershell
run: |
tree /F C:\$Env:GITHUB_RUN_ID\build-results
# Needed for coverage in win-test.sh
- uses: actions/setup-python@v2
name: Setup Python3
with:
python-version: '3.x'
- name: Test
shell: bash
env:
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
run: |
if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
export SHARD_NUMBER=0
fi
if [[ -n $GITHUB_HEAD_REF && "$RUN_SMOKE_TESTS_ONLY_ON_PR" == "true" ]]; then
export RUN_SMOKE_TESTS_ONLY=1
fi
.jenkins/pytorch/win-test.sh
- name: Zip test reports for upload
if: always()
env:
FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
shell: powershell
run: |
# -ir => recursive include all files in pattern
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml'
- uses: actions/upload-artifact@v2
name: Store Test Reports
if: always()
with:
name: test-reports-${{ matrix.config }}
retention-days: 14
if-no-files-found: error
path:
pytorch-${{ github.run_id }}/test-reports-*.zip
- uses: seemethere/upload-artifact-s3@v3
name: Store Test Reports on S3
if: always()
with:
retention-days: 14
if-no-files-found: error
path:
pytorch-${{ github.run_id }}/test-reports-*.zip
- name: Install render_test_results dependencies
if: always()
shell: bash
run: |
python3 -m pip install junitparser==2.1.1 rich==10.9.0
- name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
if: always()
shell: bash
# Encoding is weird on windows, just try to default to utf-8 if possible
env:
PYTHONIOENCODING: "utf-8"
run: |
python3 tools/render_junit.py test/
- name: Wait until all sessions have drained
shell: powershell
if: always()
timeout-minutes: 120
run: |
.github\scripts\wait_for_ssh_to_drain.ps1
- name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
shell: powershell
if: always()
run: |
.github\scripts\kill_active_ssh_sessions.ps1
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Display and upload test statistics (Click Me)
if: always()
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
AWS_DEFAULT_REGION: us-east-1
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
JOB_BASE_NAME: win-vs2019-cuda11.3-py3-test
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
shell: bash
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install boto3==1.16.34
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
- name: Cleanup workspace
if: always()
shell: bash
# Should remove the entirety of pytorch-${{ github.run_id }}
run: |
rm -rf ./*

Some files were not shown because too many files have changed in this diff Show More