mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Enable manywheel build and smoke test on main branch for ROCm (#153287)
Fixes issue of not discovering breakage of ROCm wheel builds until the nightly job runs e.g. https://github.com/pytorch/pytorch/pull/153253 Pull Request resolved: https://github.com/pytorch/pytorch/pull/153287 Approved by: https://github.com/jeffdaily
This commit is contained in:
committed by
PyTorch MergeBot
parent
5285d10243
commit
794ef6c9b8
1
.github/actionlint.yaml
vendored
1
.github/actionlint.yaml
vendored
@ -49,6 +49,7 @@ self-hosted-runner:
|
||||
# Organization-wide AMD-hosted runners
|
||||
# MI2xx runners
|
||||
- linux.rocm.gpu
|
||||
- linux.rocm.gpu.mi250
|
||||
- linux.rocm.gpu.2
|
||||
- linux.rocm.gpu.4
|
||||
# MI300 runners
|
||||
|
2
.github/scripts/generate_ci_workflows.py
vendored
2
.github/scripts/generate_ci_workflows.py
vendored
@ -152,7 +152,7 @@ LINUX_BINARY_SMOKE_WORKFLOWS = [
|
||||
package_type="manywheel",
|
||||
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
||||
OperatingSystem.LINUX,
|
||||
arches=["12.6", "12.8", "12.9"],
|
||||
arches=["12.6", "12.8", "12.9", "6.4"],
|
||||
python_versions=["3.9"],
|
||||
),
|
||||
branches="main",
|
||||
|
@ -171,7 +171,7 @@ jobs:
|
||||
- name: Teardown XPU
|
||||
uses: ./.github/actions/teardown-xpu
|
||||
{%- else %}
|
||||
runs-on: linux.rocm.gpu
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: !{{ common.timeout_minutes }}
|
||||
!{{ upload.binary_env(config) }}
|
||||
steps:
|
||||
|
4
.github/workflows/generated-linux-binary-libtorch-nightly.yml
generated
vendored
4
.github/workflows/generated-linux-binary-libtorch-nightly.yml
generated
vendored
@ -274,7 +274,7 @@ jobs:
|
||||
needs:
|
||||
- libtorch-rocm6_3-shared-with-deps-release-build
|
||||
- get-label-type
|
||||
runs-on: linux.rocm.gpu
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
@ -388,7 +388,7 @@ jobs:
|
||||
needs:
|
||||
- libtorch-rocm6_4-shared-with-deps-release-build
|
||||
- get-label-type
|
||||
runs-on: linux.rocm.gpu
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
|
92
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
92
.github/workflows/generated-linux-binary-manywheel-main.yml
generated
vendored
@ -182,3 +182,95 @@ jobs:
|
||||
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 and 12.9 build need sm_70+ runner
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
manywheel-py3_9-rocm6_4-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
uses: ./.github/workflows/_binary-build-linux.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: rocm6.4
|
||||
GPU_ARCH_VERSION: 6.4
|
||||
GPU_ARCH_TYPE: rocm
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build_name: manywheel-py3_9-rocm6_4
|
||||
build_environment: linux-binary-manywheel
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-rocm6_4-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- manywheel-py3_9-rocm6_4-build
|
||||
- get-label-type
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
PACKAGE_TYPE: manywheel
|
||||
# TODO: This is a legacy variable that we eventually want to get rid of in
|
||||
# favor of GPU_ARCH_VERSION
|
||||
DESIRED_CUDA: rocm6.4
|
||||
GPU_ARCH_VERSION: 6.4
|
||||
GPU_ARCH_TYPE: rocm
|
||||
SKIP_ALL_TESTS: 1
|
||||
DOCKER_IMAGE: manylinux2_28-builder
|
||||
DOCKER_IMAGE_TAG_PREFIX: rocm6.4
|
||||
use_split_build: False
|
||||
DESIRED_PYTHON: "3.9"
|
||||
steps:
|
||||
- name: Setup ROCm
|
||||
uses: ./.github/actions/setup-rocm
|
||||
- uses: actions/download-artifact@v4.1.7
|
||||
name: Download Build Artifacts
|
||||
with:
|
||||
name: manywheel-py3_9-rocm6_4
|
||||
path: "${{ runner.temp }}/artifacts/"
|
||||
- name: Checkout PyTorch
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
submodules: recursive
|
||||
path: pytorch
|
||||
show-progress: false
|
||||
- name: Clean PyTorch checkout
|
||||
run: |
|
||||
# Remove any artifacts from the previous checkouts
|
||||
git clean -fxd
|
||||
working-directory: pytorch
|
||||
- name: ROCm set GPU_FLAG
|
||||
run: |
|
||||
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
|
||||
- name: configure aws credentials
|
||||
id: aws_creds
|
||||
if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }}
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
|
||||
aws-region: us-east-1
|
||||
role-duration-seconds: 18000
|
||||
- name: Calculate docker image
|
||||
id: calculate-docker-image
|
||||
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
||||
with:
|
||||
docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }}
|
||||
docker-image-name: manylinux2_28-builder
|
||||
custom-tag-prefix: rocm6.4
|
||||
docker-build-dir: .ci/docker
|
||||
working-directory: pytorch
|
||||
- name: Pull Docker image
|
||||
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
||||
with:
|
||||
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||
- name: Test Pytorch binary
|
||||
uses: ./pytorch/.github/actions/test-pytorch-binary
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
||||
- name: Teardown ROCm
|
||||
uses: ./.github/actions/teardown-rocm
|
||||
|
24
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
24
.github/workflows/generated-linux-binary-manywheel-nightly.yml
generated
vendored
@ -345,7 +345,7 @@ jobs:
|
||||
needs:
|
||||
- manywheel-py3_9-rocm6_3-build
|
||||
- get-label-type
|
||||
runs-on: linux.rocm.gpu
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
@ -459,7 +459,7 @@ jobs:
|
||||
needs:
|
||||
- manywheel-py3_9-rocm6_4-build
|
||||
- get-label-type
|
||||
runs-on: linux.rocm.gpu
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
@ -958,7 +958,7 @@ jobs:
|
||||
needs:
|
||||
- manywheel-py3_10-rocm6_3-build
|
||||
- get-label-type
|
||||
runs-on: linux.rocm.gpu
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
@ -1072,7 +1072,7 @@ jobs:
|
||||
needs:
|
||||
- manywheel-py3_10-rocm6_4-build
|
||||
- get-label-type
|
||||
runs-on: linux.rocm.gpu
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
@ -1639,7 +1639,7 @@ jobs:
|
||||
needs:
|
||||
- manywheel-py3_11-rocm6_3-build
|
||||
- get-label-type
|
||||
runs-on: linux.rocm.gpu
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
@ -1753,7 +1753,7 @@ jobs:
|
||||
needs:
|
||||
- manywheel-py3_11-rocm6_4-build
|
||||
- get-label-type
|
||||
runs-on: linux.rocm.gpu
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
@ -2252,7 +2252,7 @@ jobs:
|
||||
needs:
|
||||
- manywheel-py3_12-rocm6_3-build
|
||||
- get-label-type
|
||||
runs-on: linux.rocm.gpu
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
@ -2366,7 +2366,7 @@ jobs:
|
||||
needs:
|
||||
- manywheel-py3_12-rocm6_4-build
|
||||
- get-label-type
|
||||
runs-on: linux.rocm.gpu
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
@ -2865,7 +2865,7 @@ jobs:
|
||||
needs:
|
||||
- manywheel-py3_13-rocm6_3-build
|
||||
- get-label-type
|
||||
runs-on: linux.rocm.gpu
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
@ -2979,7 +2979,7 @@ jobs:
|
||||
needs:
|
||||
- manywheel-py3_13-rocm6_4-build
|
||||
- get-label-type
|
||||
runs-on: linux.rocm.gpu
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
@ -3478,7 +3478,7 @@ jobs:
|
||||
needs:
|
||||
- manywheel-py3_13t-rocm6_3-build
|
||||
- get-label-type
|
||||
runs-on: linux.rocm.gpu
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
@ -3592,7 +3592,7 @@ jobs:
|
||||
needs:
|
||||
- manywheel-py3_13t-rocm6_4-build
|
||||
- get-label-type
|
||||
runs-on: linux.rocm.gpu
|
||||
runs-on: linux.rocm.gpu.mi250
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
PYTORCH_ROOT: /pytorch
|
||||
|
Reference in New Issue
Block a user