diff --git a/.github/scripts/generate_ci_workflows.py b/.github/scripts/generate_ci_workflows.py index 55cb02504ea4..4df6150f9765 100755 --- a/.github/scripts/generate_ci_workflows.py +++ b/.github/scripts/generate_ci_workflows.py @@ -22,6 +22,7 @@ LABEL_CIFLOW_BINARIES = "ciflow/binaries" LABEL_CIFLOW_PERIODIC = "ciflow/periodic" LABEL_CIFLOW_BINARIES_LIBTORCH = "ciflow/binaries_libtorch" LABEL_CIFLOW_BINARIES_WHEEL = "ciflow/binaries_wheel" +LABEL_CIFLOW_ROCM = "ciflow/rocm" @dataclass @@ -146,13 +147,35 @@ LINUX_BINARY_BUILD_WORFKLOWS = [ ), ] +ROCM_SMOKE_WORKFLOWS = [ + BinaryBuildWorkflow( + os=OperatingSystem.LINUX, + package_type="manywheel", + build_variant="rocm", + build_configs=generate_binary_build_matrix.generate_wheels_matrix( + OperatingSystem.LINUX, + arches=["6.4"], + python_versions=["3.9"], + ), + ciflow_config=CIFlowConfig( + labels={ + LABEL_CIFLOW_BINARIES, + LABEL_CIFLOW_BINARIES_WHEEL, + LABEL_CIFLOW_ROCM, + }, + isolated_workflow=True, + ), + branches="main", + ), +] + LINUX_BINARY_SMOKE_WORKFLOWS = [ BinaryBuildWorkflow( os=OperatingSystem.LINUX, package_type="manywheel", build_configs=generate_binary_build_matrix.generate_wheels_matrix( OperatingSystem.LINUX, - arches=["12.6", "12.8", "12.9", "6.4"], + arches=["12.6", "12.8", "12.9"], python_versions=["3.9"], ), branches="main", @@ -387,6 +410,11 @@ def main() -> None: jinja_env.get_template("linux_binary_build_workflow.yml.j2"), S390X_BINARY_BUILD_WORKFLOWS, ), + ( + # Give rocm it's own workflow file + jinja_env.get_template("linux_binary_build_workflow.yml.j2"), + ROCM_SMOKE_WORKFLOWS, + ), ( jinja_env.get_template("linux_binary_build_workflow.yml.j2"), LINUX_BINARY_SMOKE_WORKFLOWS, diff --git a/.github/workflows/generated-linux-binary-manywheel-main.yml b/.github/workflows/generated-linux-binary-manywheel-main.yml index 8e27aca1150b..d1e89bb6e2d8 100644 --- a/.github/workflows/generated-linux-binary-manywheel-main.yml +++ b/.github/workflows/generated-linux-binary-manywheel-main.yml @@ -182,95 +182,3 @@ jobs: runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 and 12.9 build need sm_70+ runner secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - - manywheel-py3_9-rocm6_4-build: - if: ${{ github.repository_owner == 'pytorch' }} - uses: ./.github/workflows/_binary-build-linux.yml - needs: get-label-type - with: - PYTORCH_ROOT: /pytorch - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm6.4 - GPU_ARCH_VERSION: 6.4 - GPU_ARCH_TYPE: rocm - DOCKER_IMAGE: manylinux2_28-builder - DOCKER_IMAGE_TAG_PREFIX: rocm6.4 - use_split_build: False - DESIRED_PYTHON: "3.9" - runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - build_name: manywheel-py3_9-rocm6_4 - build_environment: linux-binary-manywheel - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - manywheel-py3_9-rocm6_4-test: # Testing - if: ${{ github.repository_owner == 'pytorch' }} - needs: - - manywheel-py3_9-rocm6_4-build - - get-label-type - runs-on: linux.rocm.gpu.mi250 - timeout-minutes: 240 - env: - PYTORCH_ROOT: /pytorch - PACKAGE_TYPE: manywheel - # TODO: This is a legacy variable that we eventually want to get rid of in - # favor of GPU_ARCH_VERSION - DESIRED_CUDA: rocm6.4 - GPU_ARCH_VERSION: 6.4 - GPU_ARCH_TYPE: rocm - SKIP_ALL_TESTS: 1 - DOCKER_IMAGE: manylinux2_28-builder - DOCKER_IMAGE_TAG_PREFIX: rocm6.4 - use_split_build: False - DESIRED_PYTHON: "3.9" - steps: - - name: Setup ROCm - uses: ./.github/actions/setup-rocm - - uses: actions/download-artifact@v4.1.7 - name: Download Build Artifacts - with: - name: manywheel-py3_9-rocm6_4 - path: "${{ runner.temp }}/artifacts/" - - name: Checkout PyTorch - uses: actions/checkout@v4 - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - submodules: recursive - path: pytorch - show-progress: false - - name: Clean PyTorch checkout - run: | - # Remove any artifacts from the previous checkouts - git clean -fxd - working-directory: pytorch - - name: ROCm set GPU_FLAG - run: | - echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" - - name: configure aws credentials - id: aws_creds - if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }} - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only - aws-region: us-east-1 - role-duration-seconds: 18000 - - name: Calculate docker image - id: calculate-docker-image - uses: pytorch/test-infra/.github/actions/calculate-docker-image@main - with: - docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} - docker-image-name: manylinux2_28-builder - custom-tag-prefix: rocm6.4 - docker-build-dir: .ci/docker - working-directory: pytorch - - name: Pull Docker image - uses: pytorch/test-infra/.github/actions/pull-docker-image@main - with: - docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - - name: Test Pytorch binary - uses: ./pytorch/.github/actions/test-pytorch-binary - env: - DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} - - name: Teardown ROCm - uses: ./.github/actions/teardown-rocm diff --git a/.github/workflows/generated-linux-binary-manywheel-rocm-main.yml b/.github/workflows/generated-linux-binary-manywheel-rocm-main.yml new file mode 100644 index 000000000000..b6b63c4e38d5 --- /dev/null +++ b/.github/workflows/generated-linux-binary-manywheel-rocm-main.yml @@ -0,0 +1,137 @@ +# @generated DO NOT EDIT MANUALLY + +# Template is at: .github/templates/linux_binary_build_workflow.yml.j2 +# Generation script: .github/scripts/generate_ci_workflows.py +name: linux-binary-manywheel-rocm + + +on: + push: + branches: + - main + tags: + - 'ciflow/binaries/*' + - 'ciflow/binaries_wheel/*' + - 'ciflow/rocm/*' + workflow_dispatch: + +permissions: + id-token: write + +env: + # Needed for conda builds + ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" + AWS_DEFAULT_REGION: us-east-1 + BINARY_ENV_FILE: /tmp/env + BUILD_ENVIRONMENT: linux-binary-manywheel-rocm + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + PYTORCH_FINAL_PACKAGE_DIR: /artifacts + PYTORCH_ROOT: /pytorch + SHA1: ${{ github.event.pull_request.head.sha || github.sha }} + SKIP_ALL_TESTS: 0 +concurrency: + group: linux-binary-manywheel-rocm-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + get-label-type: + if: github.repository_owner == 'pytorch' + name: get-label-type + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main + with: + triggering_actor: ${{ github.triggering_actor }} + issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} + curr_branch: ${{ github.head_ref || github.ref_name }} + curr_ref_type: ${{ github.ref_type }} + manywheel-py3_9-rocm6_4-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm6.4 + GPU_ARCH_VERSION: 6.4 + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: rocm6.4 + use_split_build: False + DESIRED_PYTHON: "3.9" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: manywheel-py3_9-rocm6_4 + build_environment: linux-binary-manywheel-rocm + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_9-rocm6_4-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_9-rocm6_4-build + - get-label-type + runs-on: linux.rocm.gpu.mi250 + timeout-minutes: 240 + env: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm6.4 + GPU_ARCH_VERSION: 6.4 + GPU_ARCH_TYPE: rocm + SKIP_ALL_TESTS: 1 + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: rocm6.4 + use_split_build: False + DESIRED_PYTHON: "3.9" + steps: + - name: Setup ROCm + uses: ./.github/actions/setup-rocm + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: manywheel-py3_9-rocm6_4 + path: "${{ runner.temp }}/artifacts/" + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + - name: ROCm set GPU_FLAG + run: | + echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" + - name: configure aws credentials + id: aws_creds + if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }} + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only + aws-region: us-east-1 + role-duration-seconds: 18000 + - name: Calculate docker image + id: calculate-docker-image + uses: pytorch/test-infra/.github/actions/calculate-docker-image@main + with: + docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} + docker-image-name: manylinux2_28-builder + custom-tag-prefix: rocm6.4 + docker-build-dir: .ci/docker + working-directory: pytorch + - name: Pull Docker image + uses: pytorch/test-infra/.github/actions/pull-docker-image@main + with: + docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} + - name: Test Pytorch binary + uses: ./pytorch/.github/actions/test-pytorch-binary + env: + DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} + - name: Teardown ROCm + uses: ./.github/actions/teardown-rocm