[ROCm] upgrade nightly wheels to rocm6.4 (#151355)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/151355
Approved by: https://github.com/jeffdaily

Co-authored-by: Jeff Daily <jeff.daily@amd.com>
This commit is contained in:
Jithun Nair
2025-04-17 17:29:07 +00:00
committed by PyTorch MergeBot
parent ef64beb232
commit b4550541ea
9 changed files with 653 additions and 655 deletions

View File

@ -25,9 +25,7 @@ python3 -m pip install meson ninja
########################### ###########################
### clone repo ### clone repo
########################### ###########################
# TEMPORARY FIX: https://gitlab.freedesktop.org/mesa/drm.git is down until 2025/03/22 GIT_SSL_NO_VERIFY=true git clone https://gitlab.freedesktop.org/mesa/drm.git
# GIT_SSL_NO_VERIFY=true git clone https://gitlab.freedesktop.org/mesa/drm.git
GIT_SSL_NO_VERIFY=true git clone git://anongit.freedesktop.org/mesa/drm
pushd drm pushd drm
########################### ###########################

View File

@ -30,7 +30,7 @@ CUDA_ARCHES_CUDNN_VERSION = {
} }
# NOTE: Also update the ROCm sources in tools/nightly.py when changing this list # NOTE: Also update the ROCm sources in tools/nightly.py when changing this list
ROCM_ARCHES = ["6.2.4", "6.3"] ROCM_ARCHES = ["6.3", "6.4"]
XPU_ARCHES = ["xpu"] XPU_ARCHES = ["xpu"]

View File

@ -87,7 +87,7 @@ jobs:
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral" runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
strategy: strategy:
matrix: matrix:
rocm_version: ["6.2.4", "6.3", "6.4"] rocm_version: ["6.3", "6.4"]
env: env:
GPU_ARCH_TYPE: rocm GPU_ARCH_TYPE: rocm
GPU_ARCH_VERSION: ${{ matrix.rocm_version }} GPU_ARCH_VERSION: ${{ matrix.rocm_version }}

View File

@ -34,7 +34,7 @@ jobs:
id-token: write id-token: write
strategy: strategy:
matrix: matrix:
rocm_version: ["64", "63", "624"] rocm_version: ["64", "63"]
steps: steps:
- name: Checkout PyTorch - name: Checkout PyTorch
uses: actions/checkout@v4 uses: actions/checkout@v4

View File

@ -133,7 +133,7 @@ jobs:
runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral" runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
strategy: strategy:
matrix: matrix:
rocm_version: ["6.2.4", "6.3", "6.4"] rocm_version: ["6.3", "6.4"]
env: env:
GPU_ARCH_TYPE: rocm-manylinux_2_28 GPU_ARCH_TYPE: rocm-manylinux_2_28
GPU_ARCH_VERSION: ${{ matrix.rocm_version }} GPU_ARCH_VERSION: ${{ matrix.rocm_version }}

View File

@ -54,7 +54,7 @@ jobs:
docker-image: ["pytorch/manylinux2_28-builder:cpu"] docker-image: ["pytorch/manylinux2_28-builder:cpu"]
include: include:
- device: "rocm" - device: "rocm"
rocm_version: "6.3" rocm_version: "6.4"
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
- device: "cuda" - device: "cuda"
rocm_version: "" rocm_version: ""

View File

@ -301,98 +301,6 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml uses: ./.github/workflows/_binary-upload.yml
libtorch-rocm6_2_4-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.2.4
GPU_ARCH_VERSION: 6.2.4
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.2.4-main
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: libtorch-rocm6_2_4-shared-with-deps-release
build_environment: linux-binary-libtorch
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
libtorch-rocm6_2_4-shared-with-deps-release-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- libtorch-rocm6_2_4-shared-with-deps-release-build
- get-label-type
runs-on: linux.rocm.gpu
timeout-minutes: 240
env:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.2.4
GPU_ARCH_VERSION: 6.2.4
GPU_ARCH_TYPE: rocm
SKIP_ALL_TESTS: 1
DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.2.4-main
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
steps:
- name: Setup ROCm
uses: ./.github/actions/setup-rocm
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
name: libtorch-rocm6_2_4-shared-with-deps-release
path: "${{ runner.temp }}/artifacts/"
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
- name: ROCm set GPU_FLAG
run: |
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
- name: Pull Docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: pytorch/libtorch-cxx11-builder:rocm6.2.4-main
- name: Test Pytorch binary
uses: ./pytorch/.github/actions/test-pytorch-binary
- name: Teardown ROCm
uses: ./.github/actions/teardown-rocm
libtorch-rocm6_2_4-shared-with-deps-release-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: libtorch-rocm6_2_4-shared-with-deps-release-test
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.2.4
GPU_ARCH_VERSION: 6.2.4
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.2.4-main
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
build_name: libtorch-rocm6_2_4-shared-with-deps-release
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml
libtorch-rocm6_3-shared-with-deps-release-build: libtorch-rocm6_3-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }} if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml uses: ./.github/workflows/_binary-build-linux.yml
@ -484,3 +392,95 @@ jobs:
secrets: secrets:
github-token: ${{ secrets.GITHUB_TOKEN }} github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml uses: ./.github/workflows/_binary-upload.yml
libtorch-rocm6_4-shared-with-deps-release-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
needs: get-label-type
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.4
GPU_ARCH_VERSION: 6.4
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.4-main
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build_name: libtorch-rocm6_4-shared-with-deps-release
build_environment: linux-binary-libtorch
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
libtorch-rocm6_4-shared-with-deps-release-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- libtorch-rocm6_4-shared-with-deps-release-build
- get-label-type
runs-on: linux.rocm.gpu
timeout-minutes: 240
env:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.4
GPU_ARCH_VERSION: 6.4
GPU_ARCH_TYPE: rocm
SKIP_ALL_TESTS: 1
DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.4-main
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
steps:
- name: Setup ROCm
uses: ./.github/actions/setup-rocm
- uses: actions/download-artifact@v4.1.7
name: Download Build Artifacts
with:
name: libtorch-rocm6_4-shared-with-deps-release
path: "${{ runner.temp }}/artifacts/"
- name: Checkout PyTorch
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
show-progress: false
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
- name: ROCm set GPU_FLAG
run: |
echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
- name: Pull Docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: pytorch/libtorch-cxx11-builder:rocm6.4-main
- name: Test Pytorch binary
uses: ./pytorch/.github/actions/test-pytorch-binary
- name: Teardown ROCm
uses: ./.github/actions/teardown-rocm
libtorch-rocm6_4-shared-with-deps-release-upload: # Uploading
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
id-token: write
contents: read
needs: libtorch-rocm6_4-shared-with-deps-release-test
with:
PYTORCH_ROOT: /pytorch
PACKAGE_TYPE: libtorch
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: rocm6.4
GPU_ARCH_VERSION: 6.4
GPU_ARCH_TYPE: rocm
DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.4-main
LIBTORCH_CONFIG: release
LIBTORCH_VARIANT: shared-with-deps
build_name: libtorch-rocm6_4-shared-with-deps-release
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
uses: ./.github/workflows/_binary-upload.yml

File diff suppressed because it is too large Load Diff

View File

@ -141,9 +141,9 @@ PIP_SOURCES = {
supported_platforms={"Linux", "Windows"}, supported_platforms={"Linux", "Windows"},
accelerator="cuda", accelerator="cuda",
), ),
"rocm-6.2.4": PipSource( "rocm-6.4": PipSource(
name="rocm-6.2.4", name="rocm-6.4",
index_url=f"{PYTORCH_NIGHTLY_PIP_INDEX_URL}/rocm6.2.4", index_url=f"{PYTORCH_NIGHTLY_PIP_INDEX_URL}/rocm6.4",
supported_platforms={"Linux"}, supported_platforms={"Linux"},
accelerator="rocm", accelerator="rocm",
), ),