Update workflows to cuda 12.4 (#7000)

- Update existing workflows that use cu121 to cu124. Note, this means
that where we download torch latest, we will now be getting torch 2.6
rather than the torch latest 2.5 provided with cuda 12.1.
- Note, nv-nightly is failing in master currently due to unrelated
errors, so this could be ignored in this PR (nv-nightly tested locally,
where it passes with 12.1 and it also passes with 12.4).

---------

Signed-off-by: Fabien Dupont <fdupont@redhat.com>
Signed-off-by: Logan Adams <loadams@microsoft.com>
Signed-off-by: Olatunji Ruwase <olruwase@microsoft.com>
Signed-off-by: inkcherry <mingzhi.liu@intel.com>
Signed-off-by: Omar Elayan <oelayan@habana.ai>
Co-authored-by: Fabien Dupont <fabiendupont@fabiendupont.fr>
Co-authored-by: Olatunji Ruwase <olruwase@microsoft.com>
Co-authored-by: Liangliang Ma <1906710196@qq.com>
Co-authored-by: inkcherry <mingzhi.liu@intel.com>
Co-authored-by: Omar Elayan <142979319+oelayan7@users.noreply.github.com>
This commit is contained in:
Logan Adams
2025-02-12 15:25:41 -08:00
committed by GitHub
parent 549e11d2cf
commit 079de6bdff
9 changed files with 24 additions and 24 deletions

View File

@ -19,7 +19,7 @@ concurrency:
jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]
steps:
- uses: actions/checkout@v4
@ -29,7 +29,7 @@ jobs:
- name: Install pytorch
run: |
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu121
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"

View File

@ -27,7 +27,7 @@ permissions:
jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]
steps:
- uses: actions/checkout@v4
@ -37,7 +37,7 @@ jobs:
- name: Install pytorch
run: |
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu121
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"

View File

@ -22,7 +22,7 @@ concurrency:
jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]
steps:
- uses: actions/checkout@v4
@ -32,7 +32,7 @@ jobs:
- name: Install pytorch
run: |
pip install -U --cache-dir $TORCH_CACHE torch==2.1.2 torchvision==0.16.2 --index-url https://download.pytorch.org/whl/cu121
pip install -U --cache-dir $TORCH_CACHE torch==2.1.2 torchvision==0.16.2 --index-url https://download.pytorch.org/whl/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
@ -58,8 +58,8 @@ jobs:
run: |
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
cd tests
#pytest $PYTEST_OPTS -m 'seq_inference' unit/ --torch_ver="2.1" --cuda_ver="12.1"
pytest $PYTEST_OPTS -m 'inference_ops' unit/ --torch_ver="2.1" --cuda_ver="12.1"
pytest $PYTEST_OPTS --forked -n 4 -m 'inference' unit/ --torch_ver="2.1" --cuda_ver="12.1"
#pytest $PYTEST_OPTS -m 'seq_inference' unit/ --torch_ver="2.1" --cuda_ver="12.4"
pytest $PYTEST_OPTS -m 'inference_ops' unit/ --torch_ver="2.1" --cuda_ver="12.4"
pytest $PYTEST_OPTS --forked -n 4 -m 'inference' unit/ --torch_ver="2.1" --cuda_ver="12.4"
# run ds_report again to check updated op list
ds_report

View File

@ -19,7 +19,7 @@ concurrency:
jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]
steps:
- uses: actions/checkout@v4
@ -29,7 +29,7 @@ jobs:
- name: Install pytorch
run: |
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu121
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"

View File

@ -27,7 +27,7 @@ concurrency:
jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]
steps:
- uses: actions/checkout@v4
@ -37,7 +37,7 @@ jobs:
- name: Install pytorch
run: |
pip3 install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu121
pip3 install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"

View File

@ -18,7 +18,7 @@ permissions:
jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]
steps:
- uses: actions/checkout@v4
@ -28,7 +28,7 @@ jobs:
- name: Install pytorch
run: |
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu121
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
@ -58,7 +58,7 @@ jobs:
run: |
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
cd tests
pytest $PYTEST_OPTS --forked -m 'nightly' unit/ --torch_ver="2.5" --cuda_ver="12.1"
pytest $PYTEST_OPTS --forked -m 'nightly' unit/ --torch_ver="2.6" --cuda_ver="12.4"
- name: Open GitHub issue if nightly CI fails
if: ${{ failure() && (github.event_name == 'schedule') }}

View File

@ -19,7 +19,7 @@ concurrency:
jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]
steps:
- uses: actions/checkout@v4
@ -29,7 +29,7 @@ jobs:
- name: Install pytorch
run: |
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu121
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
@ -55,5 +55,5 @@ jobs:
run: |
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
cd tests
pytest $PYTEST_OPTS --forked -n 4 unit/ --torch_ver="2.5" --cuda_ver="12.1"
pytest $PYTEST_OPTS --forked -m 'sequential' unit/ --torch_ver="2.5" --cuda_ver="12.1"
pytest $PYTEST_OPTS --forked -n 4 unit/ --torch_ver="2.6" --cuda_ver="12.4"
pytest $PYTEST_OPTS --forked -m 'sequential' unit/ --torch_ver="2.6" --cuda_ver="12.4"

View File

@ -18,7 +18,7 @@ permissions:
jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]
steps:
- uses: actions/checkout@v4
@ -28,7 +28,7 @@ jobs:
- name: Install pytorch
run: |
pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu121
pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"

View File

@ -18,7 +18,7 @@ concurrency:
jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]
steps:
- uses: actions/checkout@v4
@ -29,7 +29,7 @@ jobs:
- name: Install pytorch
run: |
# use the same pytorch version as transformers CI
pip install -U --cache-dir $TORCH_CACHE torch==2.0.1+cu121 --index-url https://download.pytorch.org/whl/cu121
pip install -U --cache-dir $TORCH_CACHE torch==2.0.1+cu124 --index-url https://download.pytorch.org/whl/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"