mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-21 01:23:56 +08:00
Compare commits
44 Commits
v4.52.1
...
younes-tes
Author | SHA1 | Date | |
---|---|---|---|
cd9523ca90 | |||
a79e6848e0 | |||
6a5042c42a | |||
c57b198910 | |||
355291d0f4 | |||
c47c2fc90a | |||
67fd8ba561 | |||
f3d8bcf5e8 | |||
250528b058 | |||
0e5b1db636 | |||
a7bec5efe7 | |||
1e59183af4 | |||
bb41c9b212 | |||
28a663eb89 | |||
40efe01020 | |||
2ac50a1898 | |||
6e993f4390 | |||
f16597da92 | |||
e0f11e9fb2 | |||
a00d4cd8fb | |||
953e016288 | |||
d7e9dbb078 | |||
a2557f0884 | |||
c93750fb0e | |||
a6d7a0222b | |||
82eb9204c7 | |||
1603a7618b | |||
8303b58544 | |||
5564dac04f | |||
031747b677 | |||
60bd8c1e0b | |||
dfbf156a40 | |||
d1cc176e22 | |||
3cfc9f6cbc | |||
bd8c61a123 | |||
13c798ba4e | |||
7e91063faa | |||
3260b0a623 | |||
521f265abc | |||
a783353e31 | |||
1f96df0efc | |||
8ba0dda329 | |||
80335b3739 | |||
e4c324bd95 |
317
.github/workflows/self-scheduled.yml
vendored
317
.github/workflows/self-scheduled.yml
vendored
@ -27,51 +27,37 @@ env:
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
RUN_PT_TF_CROSS_TESTS: 1
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
NUM_SLICES: 2
|
||||
NUM_SLICES: 128
|
||||
CI_SLACK_CHANNEL_ID_DAILY: transformers-ci-daily-models
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
name: Setup
|
||||
strategy:
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
|
||||
slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Set up Python 3.9
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.9
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cleanup
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
rm -rf tests/__pycache__
|
||||
rm -rf tests/models/__pycache__
|
||||
rm -rf reports
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- id: set-matrix
|
||||
name: Identify models to test
|
||||
working-directory: /transformers/tests
|
||||
run: |
|
||||
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
|
||||
echo "folder_slices=$(python3 ./utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
|
||||
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
run_tests_gpu:
|
||||
name: " "
|
||||
run_model_tests_gpu:
|
||||
name: "Run model tests"
|
||||
needs: setup
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@ -85,280 +71,13 @@ jobs:
|
||||
slice_id: ${{ matrix.slice_id }}
|
||||
secrets: inherit
|
||||
|
||||
run_examples_gpu:
|
||||
name: Examples directory
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu]
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
needs: setup
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run examples tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
pip install -r examples/pytorch/_tests_requirements.txt
|
||||
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_examples_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu
|
||||
|
||||
run_pipelines_torch_gpu:
|
||||
name: PyTorch pipelines
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
|
||||
container:
|
||||
image: huggingface/transformers-pytorch-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
needs: setup
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run all pipeline tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu
|
||||
|
||||
run_pipelines_tf_gpu:
|
||||
name: TensorFlow pipelines
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
|
||||
container:
|
||||
image: huggingface/transformers-tensorflow-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
needs: setup
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run all pipeline tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_tf_pipeline_gpu tests/pipelines
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ always() }}
|
||||
run: |
|
||||
cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu
|
||||
|
||||
run_all_tests_torch_cuda_extensions_gpu:
|
||||
name: Torch CUDA extension tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
|
||||
needs: setup
|
||||
container:
|
||||
image: huggingface/transformers-pytorch-deepspeed-latest-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /workspace/transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /workspace/transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: Remove cached torch extensions
|
||||
run: rm -rf /github/home/.cache/torch_extensions/
|
||||
|
||||
# To avoid unknown test failures
|
||||
- name: Pre build DeepSpeed *again*
|
||||
working-directory: /workspace
|
||||
run: |
|
||||
python3 -m pip uninstall -y deepspeed
|
||||
DS_DISABLE_NINJA=1 DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /workspace/transformers
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /workspace/transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run all tests on GPU
|
||||
working-directory: /workspace/transformers
|
||||
run: |
|
||||
python -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
||||
path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
|
||||
|
||||
run_tests_quantization_torch_gpu:
|
||||
name: Quantization tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
|
||||
container:
|
||||
image: huggingface/transformers-quantization-latest-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
needs: setup
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run quantization tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu tests/quantization
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu
|
||||
|
||||
run_extract_warnings:
|
||||
name: Extract warnings in CI artifacts
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
needs: [
|
||||
setup,
|
||||
run_tests_gpu,
|
||||
run_examples_gpu,
|
||||
run_pipelines_tf_gpu,
|
||||
run_pipelines_torch_gpu,
|
||||
run_all_tests_torch_cuda_extensions_gpu,
|
||||
run_tests_quantization_torch_gpu,
|
||||
run_model_tests_gpu,
|
||||
]
|
||||
steps:
|
||||
- name: Checkout transformers
|
||||
@ -401,12 +120,7 @@ jobs:
|
||||
if: always()
|
||||
needs: [
|
||||
setup,
|
||||
run_tests_gpu,
|
||||
run_examples_gpu,
|
||||
run_pipelines_tf_gpu,
|
||||
run_pipelines_torch_gpu,
|
||||
run_all_tests_torch_cuda_extensions_gpu,
|
||||
run_tests_quantization_torch_gpu,
|
||||
run_model_tests_gpu,
|
||||
run_extract_warnings
|
||||
]
|
||||
steps:
|
||||
@ -422,11 +136,10 @@ jobs:
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
|
||||
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
||||
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
|
||||
CI_SLACK_REPORT_CHANNEL_ID: ${{ env.CI_SLACK_CHANNEL_ID_DAILY }}
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_EVENT: scheduled
|
||||
CI_EVENT: scheduled-models
|
||||
CI_SHA: ${{ github.sha }}
|
||||
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
|
||||
SETUP_STATUS: ${{ needs.setup.result }}
|
||||
|
170
.github/workflows/slow-examples-test.yml
vendored
Normal file
170
.github/workflows/slow-examples-test.yml
vendored
Normal file
@ -0,0 +1,170 @@
|
||||
name: Self-hosted runner - SLOW Example tests (scheduled)
|
||||
|
||||
# Note that each job's dependencies go into a corresponding docker file.
|
||||
#
|
||||
# For example for `run_all_tests_torch_cuda_extensions_gpu` the docker image is
|
||||
# `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at
|
||||
# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile`
|
||||
|
||||
on:
|
||||
repository_dispatch:
|
||||
schedule:
|
||||
- cron: "21 2 * * *"
|
||||
push:
|
||||
branches:
|
||||
- run_scheduled_ci*
|
||||
- younes-test-workflow
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
RUN_SLOW: yes
|
||||
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
|
||||
# This token is created under the bot `hf-transformers-bot`.
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
RUN_PT_TF_CROSS_TESTS: 1
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
NUM_SLICES: 2
|
||||
CI_SLACK_CHANNEL_ID_DAILY: transformers-ci-daily-examples
|
||||
|
||||
jobs:
|
||||
run_examples_gpu:
|
||||
name: Examples directory
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu]
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, ci]
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run examples tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
pip install -r examples/pytorch/_tests_requirements.txt
|
||||
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_examples_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu
|
||||
|
||||
run_extract_warnings:
|
||||
name: Extract warnings in CI artifacts
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
needs: [
|
||||
run_examples_gpu,
|
||||
]
|
||||
steps:
|
||||
- name: Checkout transformers
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Install transformers
|
||||
run: pip install transformers
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
run: pip freeze
|
||||
|
||||
- name: Create output directory
|
||||
run: mkdir warnings_in_ci
|
||||
|
||||
- uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: warnings_in_ci
|
||||
|
||||
- name: Show artifacts
|
||||
run: echo "$(python3 -c 'import os; d = os.listdir(); print(d)')"
|
||||
working-directory: warnings_in_ci
|
||||
|
||||
- name: Extract warnings in CI artifacts
|
||||
run: |
|
||||
python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh
|
||||
echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')"
|
||||
|
||||
- name: Upload artifact
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: warnings_in_ci
|
||||
path: warnings_in_ci/selected_warnings.json
|
||||
|
||||
send_results:
|
||||
name: Send results to webhook
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
needs: [
|
||||
run_examples_gpu,
|
||||
run_extract_warnings
|
||||
]
|
||||
steps:
|
||||
- name: Preliminary job status
|
||||
shell: bash
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
run: |
|
||||
echo "Setup status: ${{ needs.setup.result }}"
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/download-artifact@v3
|
||||
- name: Send message to Slack
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
||||
CI_SLACK_REPORT_CHANNEL_ID: ${{ env.CI_SLACK_CHANNEL_ID_DAILY }}
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_EVENT: scheduled-examples
|
||||
CI_SHA: ${{ github.sha }}
|
||||
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
|
||||
SETUP_STATUS: success # No need for setup status here
|
||||
CI_SKIP_ERRORED_OUT: true
|
||||
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
|
||||
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
|
||||
run: |
|
||||
sudo apt-get install -y curl
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
python utils/notification_service.py
|
||||
|
||||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||
- name: Failure table artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: prev_ci_results
|
||||
path: prev_ci_results
|
165
.github/workflows/slow-generation-tests.yml
vendored
Normal file
165
.github/workflows/slow-generation-tests.yml
vendored
Normal file
@ -0,0 +1,165 @@
|
||||
name: Self-hosted runner - SLOW Generation tests (scheduled)
|
||||
|
||||
# Note that each job's dependencies go into a corresponding docker file.
|
||||
#
|
||||
# For example for `run_all_tests_torch_cuda_extensions_gpu` the docker image is
|
||||
# `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at
|
||||
# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile`
|
||||
|
||||
on:
|
||||
repository_dispatch:
|
||||
schedule:
|
||||
- cron: "21 2 * * *"
|
||||
push:
|
||||
branches:
|
||||
- run_scheduled_ci*
|
||||
- younes-test-workflow
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
RUN_SLOW: yes
|
||||
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
|
||||
# This token is created under the bot `hf-transformers-bot`.
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
RUN_PT_TF_CROSS_TESTS: 1
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
NUM_SLICES: 2
|
||||
CI_SLACK_CHANNEL_ID_DAILY: transformers-ci-daily-generation
|
||||
|
||||
jobs:
|
||||
run_generation_tests:
|
||||
name: PyTorch Generation tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
docker_image: ["huggingface/transformers-all-latest-gpu"] # TODO: add also PT nightly here
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, ci]
|
||||
container:
|
||||
image: ${{ matrix.docker_image }}
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
env:
|
||||
MACHINE_TYPE: ${{ matrix.machine_type }}
|
||||
RUN_SLOW: 1
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run all Generation tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
make run_generation_tests
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_generation_gpu/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_generation_gpu"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_generation_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_generation_gpu
|
||||
|
||||
run_extract_warnings:
|
||||
name: Extract warnings in CI artifacts
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
needs: run_generation_tests
|
||||
steps:
|
||||
- name: Checkout transformers
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Install transformers
|
||||
run: pip install transformers
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
run: pip freeze
|
||||
|
||||
- name: Create output directory
|
||||
run: mkdir warnings_in_ci
|
||||
|
||||
- uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: warnings_in_ci
|
||||
|
||||
- name: Show artifacts
|
||||
run: echo "$(python3 -c 'import os; d = os.listdir(); print(d)')"
|
||||
working-directory: warnings_in_ci
|
||||
|
||||
- name: Extract warnings in CI artifacts
|
||||
run: |
|
||||
python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh
|
||||
echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')"
|
||||
|
||||
- name: Upload artifact
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: warnings_in_ci
|
||||
path: warnings_in_ci/selected_warnings.json
|
||||
|
||||
send_results:
|
||||
name: Send results to webhook
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
needs: run_generation_tests
|
||||
steps:
|
||||
- name: Preliminary job status
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Test status: ${{ needs.run_generation_tests.result }}"
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/download-artifact@v3
|
||||
- name: Send message to Slack
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
||||
CI_SLACK_REPORT_CHANNEL_ID: ${{ env.CI_SLACK_CHANNEL_ID_DAILY }}
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_EVENT: scheduled-generation
|
||||
CI_SHA: ${{ github.sha }}
|
||||
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
|
||||
SETUP_STATUS: success # No need for setup status here
|
||||
CI_SKIP_ERRORED_OUT: true
|
||||
run: |
|
||||
sudo apt-get install -y curl
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
python utils/notification_service.py
|
||||
|
||||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||
- name: Failure table artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: prev_ci_results
|
||||
path: prev_ci_results
|
113
.github/workflows/slow-integration-tests.yml
vendored
Normal file
113
.github/workflows/slow-integration-tests.yml
vendored
Normal file
@ -0,0 +1,113 @@
|
||||
name: Self-hosted runner - SLOW integration tests (DeepSpeed, PEFT, FA2, etc.) (scheduled)
|
||||
|
||||
on:
|
||||
repository_dispatch:
|
||||
schedule:
|
||||
- cron: "20 5 * * *"
|
||||
push:
|
||||
branches:
|
||||
- run_scheduled_ci*
|
||||
- younes-test-workflow
|
||||
|
||||
jobs:
|
||||
run_integration_tests:
|
||||
name: Run all integration tests
|
||||
strategy:
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, ci] # TODO: move to A10
|
||||
container:
|
||||
image: huggingface/transformers-pytorch-deepspeed-latest-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
|
||||
env:
|
||||
MACHINE_TYPE: ${{ matrix.machine_type }}
|
||||
RUN_SLOW: 1
|
||||
DS_BUILD_CPU_ADAM: 1
|
||||
DS_BUILD_FUSED_ADAM: 1
|
||||
MAX_JOBS: 4
|
||||
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /workspace/transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /workspace/transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: Remove cached torch extensions
|
||||
run: rm -rf /github/home/.cache/torch_extensions/
|
||||
|
||||
# To avoid unknown test failures
|
||||
- name: Pre build DeepSpeed *again*
|
||||
working-directory: /workspace
|
||||
run: |
|
||||
python3 -m pip uninstall -y deepspeed
|
||||
DS_DISABLE_NINJA=1 DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /workspace/transformers
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /workspace/transformers
|
||||
run: pip freeze
|
||||
# TODO: uncomment that after A10
|
||||
# - name: Install Flash Attention
|
||||
# run: |
|
||||
# pip install flash-attn --no-build-isolation
|
||||
|
||||
- name: Run integration tests
|
||||
working-directory: /workspace/transformers
|
||||
run: |
|
||||
make run_integration_tests
|
||||
|
||||
- name: Run Flash Attention-2 tests
|
||||
working-directory: /workspace/transformers
|
||||
run: |
|
||||
make run_flash_attn_tests
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: |
|
||||
cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_integration/failures_short.txt
|
||||
cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_flash_attention/failures_short.txt
|
||||
|
||||
send_results:
|
||||
name: Send results to webhook
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
needs: run_integration_tests
|
||||
steps:
|
||||
- name: Preliminary job status
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Tests status: ${{ needs.run_integration_tests.result }}"
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/download-artifact@v3
|
||||
- name: Send message to Slack
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||
CI_SLACK_CHANNEL_ID_DAILY: transformers-ci-daily-integrations
|
||||
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
||||
CI_SLACK_REPORT_CHANNEL_ID: transformers-ci-daily-integrations
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_EVENT: scheduled
|
||||
CI_SHA: ${{ github.sha }}
|
||||
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
|
||||
SETUP_STATUS: success
|
||||
CI_SKIP_ERRORED_OUT: true
|
||||
run: |
|
||||
sudo apt-get install -y curl
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
python utils/notification_service.py
|
205
.github/workflows/slow-pipeline-tests.yml
vendored
Normal file
205
.github/workflows/slow-pipeline-tests.yml
vendored
Normal file
@ -0,0 +1,205 @@
|
||||
name: Self-hosted runner - SLOW pipeline tests (scheduled)
|
||||
|
||||
# Note that each job's dependencies go into a corresponding docker file.
|
||||
#
|
||||
# For example for `run_all_tests_torch_cuda_extensions_gpu` the docker image is
|
||||
# `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at
|
||||
# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile`
|
||||
|
||||
on:
|
||||
repository_dispatch:
|
||||
schedule:
|
||||
- cron: "21 2 * * *"
|
||||
push:
|
||||
branches:
|
||||
- run_scheduled_ci*
|
||||
- younes-test-workflow
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
RUN_SLOW: yes
|
||||
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
|
||||
# This token is created under the bot `hf-transformers-bot`.
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
RUN_PT_TF_CROSS_TESTS: 1
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
NUM_SLICES: 2
|
||||
|
||||
jobs:
|
||||
run_pipelines_torch_gpu:
|
||||
name: PyTorch pipelines
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
docker_image: ["huggingface/transformers-all-latest-gpu"] # TODO: add also PT nightly here
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
|
||||
container:
|
||||
image: ${{ matrix.docker_image }}
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run all pipeline tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu
|
||||
|
||||
run_pipelines_tf_gpu:
|
||||
name: TensorFlow pipelines
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
docker_image: ["huggingface/transformers-tensorflow-gpu"] # TODO: add also PT nightly here
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
|
||||
container:
|
||||
image: ${{ matrix.docker_image }}
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install locally and show env
|
||||
run: |
|
||||
pip install -e .
|
||||
python3 utils/print_env.py
|
||||
pip freeze
|
||||
|
||||
- name: Run all pipeline tests on GPU
|
||||
run: |
|
||||
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_tf_pipeline_gpu tests/pipelines
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ always() }}
|
||||
run: |
|
||||
cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu
|
||||
|
||||
|
||||
run_extract_warnings:
|
||||
name: Extract warnings in CI artifacts
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
needs: [
|
||||
run_pipelines_tf_gpu,
|
||||
run_pipelines_torch_gpu,
|
||||
]
|
||||
steps:
|
||||
- name: Checkout transformers
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Install transformers
|
||||
run: pip install transformers
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
run: pip freeze
|
||||
|
||||
- name: Create output directory
|
||||
run: mkdir warnings_in_ci
|
||||
|
||||
- uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: warnings_in_ci
|
||||
|
||||
- name: Show artifacts
|
||||
run: echo "$(python3 -c 'import os; d = os.listdir(); print(d)')"
|
||||
working-directory: warnings_in_ci
|
||||
|
||||
- name: Extract warnings in CI artifacts
|
||||
run: |
|
||||
python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh
|
||||
echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')"
|
||||
|
||||
- name: Upload artifact
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: warnings_in_ci
|
||||
path: warnings_in_ci/selected_warnings.json
|
||||
|
||||
send_results:
|
||||
name: Send results to webhook
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
needs: [
|
||||
run_pipelines_tf_gpu,
|
||||
run_pipelines_torch_gpu,
|
||||
]
|
||||
steps:
|
||||
- name: Preliminary job status
|
||||
shell: bash
|
||||
run: |
|
||||
echo "TF Pipeline tests status: ${{ needs.run_pipelines_tf_gpu.result }}"
|
||||
echo "Torch Pipeline tests status: ${{ needs.run_pipelines_torch_gpu.result }}"
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/download-artifact@v3
|
||||
- name: Send message to Slack
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||
CI_SLACK_CHANNEL_ID_DAILY: transformers-ci-daily-pipeline
|
||||
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
||||
CI_SLACK_REPORT_CHANNEL_ID: transformers-ci-daily-pipeline
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_EVENT: scheduled
|
||||
CI_SHA: ${{ github.sha }}
|
||||
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
|
||||
SETUP_STATUS: success # No need for setup status here
|
||||
CI_SKIP_ERRORED_OUT: true
|
||||
run: |
|
||||
sudo apt-get install -y curl
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
python utils/notification_service.py
|
||||
|
||||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||
- name: Failure table artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: prev_ci_results
|
||||
path: prev_ci_results
|
166
.github/workflows/slow-quantization-tests.yml
vendored
Normal file
166
.github/workflows/slow-quantization-tests.yml
vendored
Normal file
@ -0,0 +1,166 @@
|
||||
name: Self-hosted runner - SLOW quantization tests (scheduled)
|
||||
|
||||
# Note that each job's dependencies go into a corresponding docker file.
|
||||
#
|
||||
# For example for `run_all_tests_torch_cuda_extensions_gpu` the docker image is
|
||||
# `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at
|
||||
# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile`
|
||||
|
||||
on:
|
||||
repository_dispatch:
|
||||
schedule:
|
||||
- cron: "6 2 * * *"
|
||||
push:
|
||||
branches:
|
||||
- run_scheduled_ci*
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
RUN_SLOW: yes
|
||||
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
|
||||
# This token is created under the bot `hf-transformers-bot`.
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
RUN_PT_TF_CROSS_TESTS: 1
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
NUM_SLICES: 2
|
||||
|
||||
jobs:
|
||||
run_tests_quantization_torch_gpu:
|
||||
name: Quantization tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
|
||||
container:
|
||||
image: huggingface/transformers-quantization-latest-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
needs: setup
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run quantization tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu tests/quantization
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu
|
||||
|
||||
|
||||
run_extract_warnings:
|
||||
name: Extract warnings in CI artifacts
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
needs: [
|
||||
run_tests_quantization_torch_gpu,
|
||||
]
|
||||
steps:
|
||||
- name: Checkout transformers
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Install transformers
|
||||
run: pip install transformers
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
run: pip freeze
|
||||
|
||||
- name: Create output directory
|
||||
run: mkdir warnings_in_ci
|
||||
|
||||
- uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: warnings_in_ci
|
||||
|
||||
- name: Show artifacts
|
||||
run: echo "$(python3 -c 'import os; d = os.listdir(); print(d)')"
|
||||
working-directory: warnings_in_ci
|
||||
|
||||
- name: Extract warnings in CI artifacts
|
||||
run: |
|
||||
python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh
|
||||
echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')"
|
||||
|
||||
- name: Upload artifact
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: warnings_in_ci
|
||||
path: warnings_in_ci/selected_warnings.json
|
||||
|
||||
send_results:
|
||||
name: Send results to webhook
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
needs: [
|
||||
run_tests_quantization_torch_gpu
|
||||
]
|
||||
steps:
|
||||
- name: Preliminary job status
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Quantization tests status: ${{ needs.run_tests_quantization_torch_gpu.result }}"
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/download-artifact@v3
|
||||
- name: Send message to Slack
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||
CI_SLACK_CHANNEL_ID_DAILY: transformers-ci-daily-quantization
|
||||
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
||||
CI_SLACK_REPORT_CHANNEL_ID: transformers-ci-daily-quantization
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_EVENT: scheduled
|
||||
CI_SHA: ${{ github.sha }}
|
||||
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
|
||||
SETUP_STATUS: success # No need for setup status here
|
||||
CI_SKIP_ERRORED_OUT: true
|
||||
run: |
|
||||
sudo apt-get install -y curl
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
python utils/notification_service.py
|
||||
|
||||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||
- name: Failure table artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: prev_ci_results
|
||||
path: prev_ci_results
|
13
Makefile
13
Makefile
@ -6,6 +6,8 @@ export PYTHONPATH = src
|
||||
check_dirs := examples tests src utils
|
||||
|
||||
exclude_folders := examples/research_projects
|
||||
integration_tests_dir := tests/deepspeed tests/fsdp tests/peft_integration tests/trainer tests/extended tests/sagemaker
|
||||
generation_tests_dir := tests/generation
|
||||
|
||||
modified_only_fixup:
|
||||
$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
|
||||
@ -92,15 +94,22 @@ test:
|
||||
python -m pytest -n auto --dist=loadfile -s -v ./tests/
|
||||
|
||||
# Run tests for examples
|
||||
|
||||
test-examples:
|
||||
python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/
|
||||
python3 -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/
|
||||
|
||||
# Run tests for SageMaker DLC release
|
||||
|
||||
test-sagemaker: # install sagemaker dependencies in advance with pip install .[sagemaker]
|
||||
TEST_SAGEMAKER=True python -m pytest -n auto -s -v ./tests/sagemaker
|
||||
|
||||
run_integration_tests:
|
||||
python3 -m pytest -v --make-reports=${MACHINE_TYPE}_tests_integration $(integration_tests_dir)
|
||||
|
||||
run_flash_attn_tests:
|
||||
python3 -m pytest -v --make-reports=${MACHINE_TYPE}_tests_flash_attention -m "flash_attn_test" tests/models/
|
||||
|
||||
run_generation_tests:
|
||||
python3 -m pytest -v --make-reports=${MACHINE_TYPE}_tests_generation $(generation_tests_dir)
|
||||
|
||||
# Release stuff
|
||||
|
||||
|
@ -465,6 +465,9 @@ class Message:
|
||||
failures = {k: v["failed"] for k, v in self.additional_results.items()}
|
||||
errors = {k: v["error"] for k, v in self.additional_results.items()}
|
||||
|
||||
# Force-skip some tests for inidivdual workflows.
|
||||
skip_errored_out_tests = os.environ.get("CI_SKIP_ERRORED_OUT", False)
|
||||
|
||||
individual_reports = []
|
||||
for key, value in failures.items():
|
||||
device_report = self.get_device_report(value)
|
||||
@ -476,7 +479,8 @@ class Message:
|
||||
if device_report:
|
||||
report = f"{device_report}{report}"
|
||||
|
||||
individual_reports.append(report)
|
||||
if (errors[key] and not skip_errored_out_tests) or device_report:
|
||||
individual_reports.append(report)
|
||||
|
||||
header = "Single | Multi | Category\n"
|
||||
failures_report = prepare_reports(
|
||||
@ -929,11 +933,18 @@ if __name__ == "__main__":
|
||||
Message.error_out(title, ci_title, runner_not_available, runner_failed, setup_failed)
|
||||
exit(0)
|
||||
|
||||
arguments = sys.argv[1:][0]
|
||||
arguments = None
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
arguments = sys.argv[1:][0]
|
||||
|
||||
try:
|
||||
folder_slices = ast.literal_eval(arguments)
|
||||
# Need to change from elements like `models/bert` to `models_bert` (the ones used as artifact names).
|
||||
models = [x.replace("models/", "models_") for folders in folder_slices for x in folders]
|
||||
if arguments is not None:
|
||||
folder_slices = ast.literal_eval(arguments)
|
||||
# Need to change from elements like `models/bert` to `models_bert` (the ones used as artifact names).
|
||||
models = [x.replace("models/", "models_") for folders in folder_slices for x in folders]
|
||||
else:
|
||||
models = []
|
||||
except SyntaxError:
|
||||
Message.error_out(title, ci_title)
|
||||
raise ValueError("Errored out.")
|
||||
@ -1043,6 +1054,7 @@ if __name__ == "__main__":
|
||||
"PyTorch pipelines": "run_tests_torch_pipeline_gpu",
|
||||
"TensorFlow pipelines": "run_tests_tf_pipeline_gpu",
|
||||
"Torch CUDA extension tests": "run_tests_torch_cuda_extensions_gpu_test_reports",
|
||||
"Integration tests": "run_integration_tests",
|
||||
"Quantization tests": "run_tests_quantization_torch_gpu",
|
||||
}
|
||||
|
||||
|
@ -34,6 +34,7 @@ python ../utils/split_model_tests.py --num_splits 64
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from glob import glob
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@ -46,12 +47,13 @@ if __name__ == "__main__":
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
tests = os.getcwd()
|
||||
model_tests = os.listdir(os.path.join(tests, "models"))
|
||||
d1 = sorted(filter(os.path.isdir, os.listdir(tests)))
|
||||
d2 = sorted(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))
|
||||
d1.remove("models")
|
||||
d = d2 + d1
|
||||
tests_dir = os.path.join(os.getcwd(), "tests")
|
||||
model_tests_dir = os.path.join(tests_dir, "models")
|
||||
|
||||
model_tests_subfolders = glob(os.path.join(model_tests_dir, "*"))
|
||||
|
||||
d = sorted(filter(os.path.isdir, model_tests_subfolders))
|
||||
d = ["/".join(sub_directory.split("/")[-2:]) for sub_directory in d]
|
||||
|
||||
num_jobs = len(d)
|
||||
num_jobs_per_splits = num_jobs // args.num_splits
|
||||
|
Reference in New Issue
Block a user