mirror of
https://github.com/deepspeedai/DeepSpeed.git
synced 2025-10-20 15:33:51 +08:00
This PR introduces *DeepCompile*, a new feature that efficiently integrates compiler optimizations with other DeepSpeed features. DeepCompile utilizes torch's dynamo to capture the computation graph and modifies it to incorporate DeepSpeed’s optimizations seamlessly. Currently, DeepCompile supports ZeRO-1 and ZeRO-3, with enhancements such as proactive prefetching and selective unsharding to improve performance. (More details will be added later.) --------- Signed-off-by: Masahiro Tanaka <mtanaka@microsoft.com> Signed-off-by: Olatunji Ruwase <olruwase@microsoft.com> Co-authored-by: zafarsadiq <zafarsadiq120@gmail.com> Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com> Co-authored-by: Olatunji Ruwase <olruwase@microsoft.com>
60 lines
1.7 KiB
YAML
60 lines
1.7 KiB
YAML
name: nv-torch-latest-v100
|
|
|
|
on:
|
|
workflow_dispatch:
|
|
pull_request:
|
|
paths-ignore:
|
|
- 'docs/**'
|
|
- 'blogs/**'
|
|
- 'deepspeed/inference/v2/**'
|
|
- 'tests/unit/inference/v2/**'
|
|
merge_group:
|
|
branches: [ master ]
|
|
schedule:
|
|
- cron: "0 0 * * *"
|
|
|
|
concurrency:
|
|
group: ${{ github.workflow }}-${{ github.ref }}
|
|
cancel-in-progress: true
|
|
|
|
jobs:
|
|
unit-tests:
|
|
runs-on: [self-hosted, nvidia, cu124, v100]
|
|
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
|
|
- id: setup-venv
|
|
uses: ./.github/workflows/setup-venv
|
|
|
|
- name: Install pytorch
|
|
run: |
|
|
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu124
|
|
python -c "import torch; print('torch:', torch.__version__, torch)"
|
|
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
|
|
|
|
- name: Install transformers
|
|
run: |
|
|
git clone https://github.com/huggingface/transformers
|
|
cd transformers
|
|
# if needed switch to the last known good SHA until transformers@master is fixed
|
|
git checkout 981c276
|
|
git rev-parse --short HEAD
|
|
pip install .
|
|
|
|
- name: Install deepspeed
|
|
run: |
|
|
pip install .[dev,1bit,autotuning,deepcompile]
|
|
ds_report
|
|
|
|
- name: Python environment
|
|
run: |
|
|
pip list
|
|
|
|
- name: Unit tests
|
|
run: |
|
|
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
|
|
cd tests
|
|
pytest $PYTEST_OPTS --forked -n 8 unit/ --torch_ver="2.6" --cuda_ver="12.4"
|
|
pytest $PYTEST_OPTS --forked -m 'sequential' unit/ --torch_ver="2.6" --cuda_ver="12.4"
|