From 6590f4fb0ef80910f892c8886a62f9e3933706ca Mon Sep 17 00:00:00 2001 From: chuanqiw Date: Tue, 20 Aug 2024 15:05:20 +0000 Subject: [PATCH] [CD] Enable python 3.13 for xpu nightly build (#133670) Enable python 3.13 for XPU nightly build, it depends on https://github.com/pytorch/pytorch/pull/133454 land. Also update the xpu nightly wheel test env. Works for https://github.com/pytorch/pytorch/issues/114850 Fixes #130543 Pull Request resolved: https://github.com/pytorch/pytorch/pull/133670 Approved by: https://github.com/atalman, https://github.com/malfet --- .circleci/scripts/binary_linux_test.sh | 4 +- .circleci/scripts/binary_populate_env.sh | 4 +- .../scripts/generate_binary_build_matrix.py | 5 +- ...nerated-linux-binary-manywheel-nightly.yml | 110 ++++++++++++++++++ 4 files changed, 116 insertions(+), 7 deletions(-) diff --git a/.circleci/scripts/binary_linux_test.sh b/.circleci/scripts/binary_linux_test.sh index 73c38159eefd..5d92c9099bff 100755 --- a/.circleci/scripts/binary_linux_test.sh +++ b/.circleci/scripts/binary_linux_test.sh @@ -117,9 +117,9 @@ if [[ "$PACKAGE_TYPE" == libtorch ]]; then fi if [[ "$GPU_ARCH_TYPE" == xpu ]]; then - # Workaround for __mkl_tmp_MOD unbound variable issue, refer https://github.com/pytorch/pytorch/issues/130543 - set +u + # Refer https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-5.html source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh + source /opt/intel/oneapi/pti/latest/env/vars.sh fi # Test the package diff --git a/.circleci/scripts/binary_populate_env.sh b/.circleci/scripts/binary_populate_env.sh index 33bf899fd908..e918635922a4 100755 --- a/.circleci/scripts/binary_populate_env.sh +++ b/.circleci/scripts/binary_populate_env.sh @@ -102,10 +102,10 @@ fi # Set triton via PYTORCH_EXTRA_INSTALL_REQUIREMENTS for triton xpu package if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*xpu.* && $(uname) == "Linux" ]]; then - TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}" + TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}; ${TRITON_CONSTRAINT}" if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton-xpu.txt) - TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}+${TRITON_SHORTHASH}" + TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}+${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}" fi if [[ -z "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}" diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py index 62b9d1ca8364..002b43195bb4 100644 --- a/.github/scripts/generate_binary_build_matrix.py +++ b/.github/scripts/generate_binary_build_matrix.py @@ -365,10 +365,9 @@ def generate_wheels_matrix( else arch_version ) - # TODO: Enable python 3.13 on rocm, xpu, aarch64, windows + # TODO: Enable python 3.13 on rocm, aarch64, windows if ( - gpu_arch_type in ["rocm", "xpu"] - or (os != "linux" and os != "linux-s390x") + gpu_arch_type == "rocm" or (os != "linux" and os != "linux-s390x") ) and python_version == "3.13": continue diff --git a/.github/workflows/generated-linux-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-binary-manywheel-nightly.yml index bd90c544a1bc..036679ce1842 100644 --- a/.github/workflows/generated-linux-binary-manywheel-nightly.yml +++ b/.github/workflows/generated-linux-binary-manywheel-nightly.yml @@ -3995,3 +3995,113 @@ jobs: conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }} uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_13-xpu-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + with: + PYTORCH_ROOT: /pytorch + BUILDER_ROOT: /builder + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu + DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main + DESIRED_PYTHON: "3.13" + runner_prefix: amz2023. + build_name: manywheel-py3_13-xpu + build_environment: linux-binary-manywheel + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_13-xpu-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: manywheel-py3_13-xpu-build + runs-on: linux.idc.xpu + timeout-minutes: 240 + env: + PYTORCH_ROOT: /pytorch + BUILDER_ROOT: /builder + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu + SKIP_ALL_TESTS: 1 + DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main + DESIRED_PYTHON: "3.13" + permissions: + id-token: write + contents: read + steps: + - name: Setup XPU + uses: ./.github/actions/setup-xpu + - name: configure aws credentials + id: aws_creds + uses: aws-actions/configure-aws-credentials@v1.7.0 + with: + role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only + aws-region: us-east-1 + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + - uses: actions/download-artifact@v3 + name: Download Build Artifacts + with: + name: manywheel-py3_13-xpu + path: "${{ runner.temp }}/artifacts/" + - name: Checkout PyTorch + uses: malfet/checkout@silent-checkout + with: + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + submodules: recursive + path: pytorch + quiet-checkout: true + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + - name: Checkout pytorch/builder + uses: malfet/checkout@silent-checkout + with: + ref: main + submodules: recursive + repository: pytorch/builder + path: builder + quiet-checkout: true + - name: Clean pytorch/builder checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: builder + - name: Pull Docker image + uses: pytorch/test-infra/.github/actions/pull-docker-image@main + with: + docker-image: pytorch/manylinux2_28-builder:xpu-main + - name: Test Pytorch binary + uses: ./pytorch/.github/actions/test-pytorch-binary + - name: Teardown XPU + uses: ./.github/actions/teardown-xpu + manywheel-py3_13-xpu-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_13-xpu-test + with: + PYTORCH_ROOT: /pytorch + BUILDER_ROOT: /builder + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu + DOCKER_IMAGE: pytorch/manylinux2_28-builder:xpu-main + DESIRED_PYTHON: "3.13" + build_name: manywheel-py3_13-xpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }} + uses: ./.github/workflows/_binary-upload.yml