mirror of
https://github.com/deepspeedai/DeepSpeed.git
synced 2025-10-20 15:33:51 +08:00
Unpin transformers version for all workflows except `nv-torch-latest-v100` as this still has a tolerance issue with some quantization tests. Signed-off-by: Logan Adams <loadams@microsoft.com>
68 lines
1.9 KiB
YAML
68 lines
1.9 KiB
YAML
name: nv-torch-nightly-v100
|
|
|
|
on:
|
|
workflow_dispatch:
|
|
schedule:
|
|
- cron: "0 0 * * *"
|
|
pull_request:
|
|
paths:
|
|
- '.github/workflows/nv-torch-nightly-v100.yml'
|
|
|
|
concurrency:
|
|
group: ${{ github.workflow }}-${{ github.ref }}
|
|
cancel-in-progress: true
|
|
|
|
permissions:
|
|
contents: read
|
|
issues: write
|
|
|
|
jobs:
|
|
unit-tests:
|
|
runs-on: [self-hosted, nvidia, cu124, v100]
|
|
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
|
|
- id: setup-venv
|
|
uses: ./.github/workflows/setup-venv
|
|
|
|
- name: Install pytorch
|
|
run: |
|
|
pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu124
|
|
python -c "import torch; print('torch:', torch.__version__, torch)"
|
|
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
|
|
|
|
- name: Install transformers
|
|
run: |
|
|
git clone https://github.com/huggingface/transformers
|
|
cd transformers
|
|
# if needed switch to the last known good SHA until transformers@master is fixed
|
|
# git checkout 981c276
|
|
git rev-parse --short HEAD
|
|
pip install .
|
|
|
|
- name: Install deepspeed
|
|
run: |
|
|
pip install .[dev,1bit,autotuning]
|
|
ds_report
|
|
|
|
- name: Python environment
|
|
run: |
|
|
pip list
|
|
|
|
- name: Unit tests
|
|
run: |
|
|
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
|
|
cd tests
|
|
pytest $PYTEST_OPTS --forked -n 8 unit/
|
|
pytest $PYTEST_OPTS --forked -m 'sequential' unit/
|
|
|
|
- name: Open GitHub issue if nightly CI fails
|
|
if: ${{ failure() && (github.event_name == 'schedule') }}
|
|
uses: JasonEtco/create-an-issue@v2
|
|
env:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
with:
|
|
filename: .github/ISSUE_TEMPLATE/ci_failure_report.md
|
|
update_existing: true
|