Compare commits

...

44 Commits

Author SHA1 Message Date
cd9523ca90 test something new 2024-02-29 09:27:54 +00:00
a79e6848e0 revert unneeded changes in notif service 2024-02-29 08:40:23 +00:00
6a5042c42a fix 2024-02-29 08:28:29 +00:00
c57b198910 fix deepsped + notification service 2024-02-29 08:17:16 +00:00
355291d0f4 multiple fixes, correct working-dir and better notification_service manegement 2024-02-29 08:01:59 +00:00
c47c2fc90a remove the need of having setup 2024-02-29 07:49:07 +00:00
67fd8ba561 fix again 2024-02-29 07:33:19 +00:00
f3d8bcf5e8 multple fixes 2024-02-29 07:24:21 +00:00
250528b058 test 2024-02-29 07:12:41 +00:00
0e5b1db636 Merge remote-tracking branch 'upstream/main' into younes-test-workflow 2024-02-29 07:09:23 +00:00
a7bec5efe7 new changes 2024-02-29 02:10:24 +00:00
1e59183af4 Merge branch 'main' into younes-test-workflow 2024-02-29 03:06:48 +01:00
bb41c9b212 revert 2024-02-23 06:12:42 +00:00
28a663eb89 revert to daily-ci 2024-02-23 06:10:08 +00:00
40efe01020 final fix 2024-02-23 06:07:44 +00:00
2ac50a1898 ultimate fix 2024-02-23 05:55:29 +00:00
6e993f4390 another fix 2024-02-23 05:52:53 +00:00
f16597da92 fix 2024-02-23 05:51:27 +00:00
e0f11e9fb2 few fixes 2024-02-23 05:49:01 +00:00
a00d4cd8fb fix indentation 2024-02-22 06:42:51 +00:00
953e016288 update 2024-02-22 06:41:13 +00:00
d7e9dbb078 more changes 2024-02-22 06:22:42 +00:00
a2557f0884 change channel 2024-02-22 06:06:19 +00:00
c93750fb0e faster setup 2024-02-22 05:34:37 +00:00
a6d7a0222b fix 2024-02-22 05:31:55 +00:00
82eb9204c7 oops 2024-02-22 05:24:23 +00:00
1603a7618b simply more 2024-02-22 05:23:21 +00:00
8303b58544 Update self-scheduled.yml 2024-02-22 06:14:43 +01:00
5564dac04f Update self-scheduled.yml 2024-02-22 06:02:02 +01:00
031747b677 test on 2 models 2024-02-22 04:57:39 +00:00
60bd8c1e0b test 2024-02-22 04:55:19 +00:00
dfbf156a40 more splits 2024-02-22 04:53:26 +00:00
d1cc176e22 format 2024-02-22 04:12:24 +00:00
3cfc9f6cbc fix 2024-02-22 04:05:24 +00:00
bd8c61a123 fix 2024-02-22 04:01:45 +00:00
13c798ba4e nit 2024-02-22 03:59:13 +00:00
7e91063faa left some todos 2024-02-22 03:59:00 +00:00
3260b0a623 some fixes 2024-02-22 03:58:20 +00:00
521f265abc fix path 2024-02-22 03:40:59 +00:00
a783353e31 fix 2024-02-22 03:39:16 +00:00
1f96df0efc change splitter 2024-02-22 03:35:09 +00:00
8ba0dda329 small refactor 2024-02-22 03:34:12 +00:00
80335b3739 make use of glob 2024-02-22 02:54:27 +00:00
e4c324bd95 small refactor 2024-02-22 02:51:37 +00:00
9 changed files with 870 additions and 315 deletions

View File

@ -27,51 +27,37 @@ env:
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
NUM_SLICES: 2
NUM_SLICES: 128
CI_SLACK_CHANNEL_ID_DAILY: transformers-ci-daily-models
jobs:
setup:
name: Setup
strategy:
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
runs-on: ubuntu-latest
outputs:
folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
steps:
- name: Update clone
working-directory: /transformers
run: |
git fetch && git checkout ${{ github.sha }}
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- uses: actions/checkout@v3
- name: Cleanup
working-directory: /transformers
run: |
rm -rf tests/__pycache__
rm -rf tests/models/__pycache__
rm -rf reports
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- id: set-matrix
name: Identify models to test
working-directory: /transformers/tests
run: |
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
echo "folder_slices=$(python3 ./utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
- name: NVIDIA-SMI
run: |
nvidia-smi
run_tests_gpu:
name: " "
run_model_tests_gpu:
name: "Run model tests"
needs: setup
strategy:
fail-fast: false
@ -85,280 +71,13 @@ jobs:
slice_id: ${{ matrix.slice_id }}
secrets: inherit
run_examples_gpu:
name: Examples directory
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu]
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run examples tests on GPU
working-directory: /transformers
run: |
pip install -r examples/pytorch/_tests_requirements.txt
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.machine_type }}_run_examples_gpu
path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu
run_pipelines_torch_gpu:
name: PyTorch pipelines
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
container:
image: huggingface/transformers-pytorch-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all pipeline tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu
run_pipelines_tf_gpu:
name: TensorFlow pipelines
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
container:
image: huggingface/transformers-tensorflow-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
- name: Update clone
working-directory: /transformers
run: |
git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all pipeline tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_tf_pipeline_gpu tests/pipelines
- name: Failure short reports
if: ${{ always() }}
run: |
cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu
path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu
run_all_tests_torch_cuda_extensions_gpu:
name: Torch CUDA extension tests
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
needs: setup
container:
image: huggingface/transformers-pytorch-deepspeed-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Update clone
working-directory: /workspace/transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /workspace/transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: Remove cached torch extensions
run: rm -rf /github/home/.cache/torch_extensions/
# To avoid unknown test failures
- name: Pre build DeepSpeed *again*
working-directory: /workspace
run: |
python3 -m pip uninstall -y deepspeed
DS_DISABLE_NINJA=1 DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /workspace/transformers
run: |
python utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /workspace/transformers
run: pip freeze
- name: Run all tests on GPU
working-directory: /workspace/transformers
run: |
python -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
run_tests_quantization_torch_gpu:
name: Quantization tests
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
container:
image: huggingface/transformers-quantization-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run quantization tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu tests/quantization
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu
path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu
run_extract_warnings:
name: Extract warnings in CI artifacts
runs-on: ubuntu-22.04
if: always()
needs: [
setup,
run_tests_gpu,
run_examples_gpu,
run_pipelines_tf_gpu,
run_pipelines_torch_gpu,
run_all_tests_torch_cuda_extensions_gpu,
run_tests_quantization_torch_gpu,
run_model_tests_gpu,
]
steps:
- name: Checkout transformers
@ -401,12 +120,7 @@ jobs:
if: always()
needs: [
setup,
run_tests_gpu,
run_examples_gpu,
run_pipelines_tf_gpu,
run_pipelines_torch_gpu,
run_all_tests_torch_cuda_extensions_gpu,
run_tests_quantization_torch_gpu,
run_model_tests_gpu,
run_extract_warnings
]
steps:
@ -422,11 +136,10 @@ jobs:
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
CI_SLACK_REPORT_CHANNEL_ID: ${{ env.CI_SLACK_CHANNEL_ID_DAILY }}
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: scheduled
CI_EVENT: scheduled-models
CI_SHA: ${{ github.sha }}
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
SETUP_STATUS: ${{ needs.setup.result }}

170
.github/workflows/slow-examples-test.yml vendored Normal file
View File

@ -0,0 +1,170 @@
name: Self-hosted runner - SLOW Example tests (scheduled)
# Note that each job's dependencies go into a corresponding docker file.
#
# For example for `run_all_tests_torch_cuda_extensions_gpu` the docker image is
# `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at
# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile`
on:
repository_dispatch:
schedule:
- cron: "21 2 * * *"
push:
branches:
- run_scheduled_ci*
- younes-test-workflow
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
NUM_SLICES: 2
CI_SLACK_CHANNEL_ID_DAILY: transformers-ci-daily-examples
jobs:
run_examples_gpu:
name: Examples directory
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu]
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, ci]
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run examples tests on GPU
working-directory: /transformers
run: |
pip install -r examples/pytorch/_tests_requirements.txt
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_examples_gpu examples/pytorch
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.machine_type }}_run_examples_gpu
path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu
run_extract_warnings:
name: Extract warnings in CI artifacts
runs-on: ubuntu-22.04
if: always()
needs: [
run_examples_gpu,
]
steps:
- name: Checkout transformers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Install transformers
run: pip install transformers
- name: Show installed libraries and their versions
run: pip freeze
- name: Create output directory
run: mkdir warnings_in_ci
- uses: actions/download-artifact@v3
with:
path: warnings_in_ci
- name: Show artifacts
run: echo "$(python3 -c 'import os; d = os.listdir(); print(d)')"
working-directory: warnings_in_ci
- name: Extract warnings in CI artifacts
run: |
python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh
echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')"
- name: Upload artifact
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: warnings_in_ci
path: warnings_in_ci/selected_warnings.json
send_results:
name: Send results to webhook
runs-on: ubuntu-22.04
if: always()
needs: [
run_examples_gpu,
run_extract_warnings
]
steps:
- name: Preliminary job status
shell: bash
# For the meaning of these environment variables, see the job `Setup`
run: |
echo "Setup status: ${{ needs.setup.result }}"
- uses: actions/checkout@v3
- uses: actions/download-artifact@v3
- name: Send message to Slack
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
CI_SLACK_REPORT_CHANNEL_ID: ${{ env.CI_SLACK_CHANNEL_ID_DAILY }}
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: scheduled-examples
CI_SHA: ${{ github.sha }}
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
SETUP_STATUS: success # No need for setup status here
CI_SKIP_ERRORED_OUT: true
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
run: |
sudo apt-get install -y curl
pip install slack_sdk
pip show slack_sdk
python utils/notification_service.py
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
- name: Failure table artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: prev_ci_results
path: prev_ci_results

View File

@ -0,0 +1,165 @@
name: Self-hosted runner - SLOW Generation tests (scheduled)
# Note that each job's dependencies go into a corresponding docker file.
#
# For example for `run_all_tests_torch_cuda_extensions_gpu` the docker image is
# `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at
# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile`
on:
repository_dispatch:
schedule:
- cron: "21 2 * * *"
push:
branches:
- run_scheduled_ci*
- younes-test-workflow
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
NUM_SLICES: 2
CI_SLACK_CHANNEL_ID_DAILY: transformers-ci-daily-generation
jobs:
run_generation_tests:
name: PyTorch Generation tests
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
docker_image: ["huggingface/transformers-all-latest-gpu"] # TODO: add also PT nightly here
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, ci]
container:
image: ${{ matrix.docker_image }}
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
env:
MACHINE_TYPE: ${{ matrix.machine_type }}
RUN_SLOW: 1
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all Generation tests on GPU
working-directory: /transformers
run: |
make run_generation_tests
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_generation_gpu/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_generation_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.machine_type }}_run_tests_generation_gpu
path: /transformers/reports/${{ matrix.machine_type }}_tests_generation_gpu
run_extract_warnings:
name: Extract warnings in CI artifacts
runs-on: ubuntu-22.04
if: always()
needs: run_generation_tests
steps:
- name: Checkout transformers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Install transformers
run: pip install transformers
- name: Show installed libraries and their versions
run: pip freeze
- name: Create output directory
run: mkdir warnings_in_ci
- uses: actions/download-artifact@v3
with:
path: warnings_in_ci
- name: Show artifacts
run: echo "$(python3 -c 'import os; d = os.listdir(); print(d)')"
working-directory: warnings_in_ci
- name: Extract warnings in CI artifacts
run: |
python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh
echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')"
- name: Upload artifact
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: warnings_in_ci
path: warnings_in_ci/selected_warnings.json
send_results:
name: Send results to webhook
runs-on: ubuntu-22.04
if: always()
needs: run_generation_tests
steps:
- name: Preliminary job status
shell: bash
run: |
echo "Test status: ${{ needs.run_generation_tests.result }}"
- uses: actions/checkout@v3
- uses: actions/download-artifact@v3
- name: Send message to Slack
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
CI_SLACK_REPORT_CHANNEL_ID: ${{ env.CI_SLACK_CHANNEL_ID_DAILY }}
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: scheduled-generation
CI_SHA: ${{ github.sha }}
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
SETUP_STATUS: success # No need for setup status here
CI_SKIP_ERRORED_OUT: true
run: |
sudo apt-get install -y curl
pip install slack_sdk
pip show slack_sdk
python utils/notification_service.py
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
- name: Failure table artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: prev_ci_results
path: prev_ci_results

View File

@ -0,0 +1,113 @@
name: Self-hosted runner - SLOW integration tests (DeepSpeed, PEFT, FA2, etc.) (scheduled)
on:
repository_dispatch:
schedule:
- cron: "20 5 * * *"
push:
branches:
- run_scheduled_ci*
- younes-test-workflow
jobs:
run_integration_tests:
name: Run all integration tests
strategy:
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, ci] # TODO: move to A10
container:
image: huggingface/transformers-pytorch-deepspeed-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
env:
MACHINE_TYPE: ${{ matrix.machine_type }}
RUN_SLOW: 1
DS_BUILD_CPU_ADAM: 1
DS_BUILD_FUSED_ADAM: 1
MAX_JOBS: 4
steps:
- name: Update clone
working-directory: /workspace/transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /workspace/transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: Remove cached torch extensions
run: rm -rf /github/home/.cache/torch_extensions/
# To avoid unknown test failures
- name: Pre build DeepSpeed *again*
working-directory: /workspace
run: |
python3 -m pip uninstall -y deepspeed
DS_DISABLE_NINJA=1 DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /workspace/transformers
run: |
python utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /workspace/transformers
run: pip freeze
# TODO: uncomment that after A10
# - name: Install Flash Attention
# run: |
# pip install flash-attn --no-build-isolation
- name: Run integration tests
working-directory: /workspace/transformers
run: |
make run_integration_tests
- name: Run Flash Attention-2 tests
working-directory: /workspace/transformers
run: |
make run_flash_attn_tests
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: |
cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_integration/failures_short.txt
cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_flash_attention/failures_short.txt
send_results:
name: Send results to webhook
runs-on: ubuntu-22.04
if: always()
needs: run_integration_tests
steps:
- name: Preliminary job status
shell: bash
run: |
echo "Tests status: ${{ needs.run_integration_tests.result }}"
- uses: actions/checkout@v3
- uses: actions/download-artifact@v3
- name: Send message to Slack
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
CI_SLACK_CHANNEL_ID_DAILY: transformers-ci-daily-integrations
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
CI_SLACK_REPORT_CHANNEL_ID: transformers-ci-daily-integrations
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: scheduled
CI_SHA: ${{ github.sha }}
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
SETUP_STATUS: success
CI_SKIP_ERRORED_OUT: true
run: |
sudo apt-get install -y curl
pip install slack_sdk
pip show slack_sdk
python utils/notification_service.py

View File

@ -0,0 +1,205 @@
name: Self-hosted runner - SLOW pipeline tests (scheduled)
# Note that each job's dependencies go into a corresponding docker file.
#
# For example for `run_all_tests_torch_cuda_extensions_gpu` the docker image is
# `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at
# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile`
on:
repository_dispatch:
schedule:
- cron: "21 2 * * *"
push:
branches:
- run_scheduled_ci*
- younes-test-workflow
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
NUM_SLICES: 2
jobs:
run_pipelines_torch_gpu:
name: PyTorch pipelines
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
docker_image: ["huggingface/transformers-all-latest-gpu"] # TODO: add also PT nightly here
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
container:
image: ${{ matrix.docker_image }}
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all pipeline tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu
run_pipelines_tf_gpu:
name: TensorFlow pipelines
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
docker_image: ["huggingface/transformers-tensorflow-gpu"] # TODO: add also PT nightly here
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
container:
image: ${{ matrix.docker_image }}
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- uses: actions/checkout@v3
- name: Install locally and show env
run: |
pip install -e .
python3 utils/print_env.py
pip freeze
- name: Run all pipeline tests on GPU
run: |
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_tf_pipeline_gpu tests/pipelines
- name: Failure short reports
if: ${{ always() }}
run: |
cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu
path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu
run_extract_warnings:
name: Extract warnings in CI artifacts
runs-on: ubuntu-22.04
if: always()
needs: [
run_pipelines_tf_gpu,
run_pipelines_torch_gpu,
]
steps:
- name: Checkout transformers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Install transformers
run: pip install transformers
- name: Show installed libraries and their versions
run: pip freeze
- name: Create output directory
run: mkdir warnings_in_ci
- uses: actions/download-artifact@v3
with:
path: warnings_in_ci
- name: Show artifacts
run: echo "$(python3 -c 'import os; d = os.listdir(); print(d)')"
working-directory: warnings_in_ci
- name: Extract warnings in CI artifacts
run: |
python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh
echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')"
- name: Upload artifact
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: warnings_in_ci
path: warnings_in_ci/selected_warnings.json
send_results:
name: Send results to webhook
runs-on: ubuntu-22.04
if: always()
needs: [
run_pipelines_tf_gpu,
run_pipelines_torch_gpu,
]
steps:
- name: Preliminary job status
shell: bash
run: |
echo "TF Pipeline tests status: ${{ needs.run_pipelines_tf_gpu.result }}"
echo "Torch Pipeline tests status: ${{ needs.run_pipelines_torch_gpu.result }}"
- uses: actions/checkout@v3
- uses: actions/download-artifact@v3
- name: Send message to Slack
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
CI_SLACK_CHANNEL_ID_DAILY: transformers-ci-daily-pipeline
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
CI_SLACK_REPORT_CHANNEL_ID: transformers-ci-daily-pipeline
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: scheduled
CI_SHA: ${{ github.sha }}
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
SETUP_STATUS: success # No need for setup status here
CI_SKIP_ERRORED_OUT: true
run: |
sudo apt-get install -y curl
pip install slack_sdk
pip show slack_sdk
python utils/notification_service.py
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
- name: Failure table artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: prev_ci_results
path: prev_ci_results

View File

@ -0,0 +1,166 @@
name: Self-hosted runner - SLOW quantization tests (scheduled)
# Note that each job's dependencies go into a corresponding docker file.
#
# For example for `run_all_tests_torch_cuda_extensions_gpu` the docker image is
# `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at
# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile`
on:
repository_dispatch:
schedule:
- cron: "6 2 * * *"
push:
branches:
- run_scheduled_ci*
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
NUM_SLICES: 2
jobs:
run_tests_quantization_torch_gpu:
name: Quantization tests
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
container:
image: huggingface/transformers-quantization-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run quantization tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu tests/quantization
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu
path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu
run_extract_warnings:
name: Extract warnings in CI artifacts
runs-on: ubuntu-22.04
if: always()
needs: [
run_tests_quantization_torch_gpu,
]
steps:
- name: Checkout transformers
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Install transformers
run: pip install transformers
- name: Show installed libraries and their versions
run: pip freeze
- name: Create output directory
run: mkdir warnings_in_ci
- uses: actions/download-artifact@v3
with:
path: warnings_in_ci
- name: Show artifacts
run: echo "$(python3 -c 'import os; d = os.listdir(); print(d)')"
working-directory: warnings_in_ci
- name: Extract warnings in CI artifacts
run: |
python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh
echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')"
- name: Upload artifact
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: warnings_in_ci
path: warnings_in_ci/selected_warnings.json
send_results:
name: Send results to webhook
runs-on: ubuntu-22.04
if: always()
needs: [
run_tests_quantization_torch_gpu
]
steps:
- name: Preliminary job status
shell: bash
run: |
echo "Quantization tests status: ${{ needs.run_tests_quantization_torch_gpu.result }}"
- uses: actions/checkout@v3
- uses: actions/download-artifact@v3
- name: Send message to Slack
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
CI_SLACK_CHANNEL_ID_DAILY: transformers-ci-daily-quantization
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
CI_SLACK_REPORT_CHANNEL_ID: transformers-ci-daily-quantization
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: scheduled
CI_SHA: ${{ github.sha }}
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
SETUP_STATUS: success # No need for setup status here
CI_SKIP_ERRORED_OUT: true
run: |
sudo apt-get install -y curl
pip install slack_sdk
pip show slack_sdk
python utils/notification_service.py
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
- name: Failure table artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: prev_ci_results
path: prev_ci_results

View File

@ -6,6 +6,8 @@ export PYTHONPATH = src
check_dirs := examples tests src utils
exclude_folders := examples/research_projects
integration_tests_dir := tests/deepspeed tests/fsdp tests/peft_integration tests/trainer tests/extended tests/sagemaker
generation_tests_dir := tests/generation
modified_only_fixup:
$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
@ -92,15 +94,22 @@ test:
python -m pytest -n auto --dist=loadfile -s -v ./tests/
# Run tests for examples
test-examples:
python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/
python3 -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/
# Run tests for SageMaker DLC release
test-sagemaker: # install sagemaker dependencies in advance with pip install .[sagemaker]
TEST_SAGEMAKER=True python -m pytest -n auto -s -v ./tests/sagemaker
run_integration_tests:
python3 -m pytest -v --make-reports=${MACHINE_TYPE}_tests_integration $(integration_tests_dir)
run_flash_attn_tests:
python3 -m pytest -v --make-reports=${MACHINE_TYPE}_tests_flash_attention -m "flash_attn_test" tests/models/
run_generation_tests:
python3 -m pytest -v --make-reports=${MACHINE_TYPE}_tests_generation $(generation_tests_dir)
# Release stuff

View File

@ -465,6 +465,9 @@ class Message:
failures = {k: v["failed"] for k, v in self.additional_results.items()}
errors = {k: v["error"] for k, v in self.additional_results.items()}
# Force-skip some tests for inidivdual workflows.
skip_errored_out_tests = os.environ.get("CI_SKIP_ERRORED_OUT", False)
individual_reports = []
for key, value in failures.items():
device_report = self.get_device_report(value)
@ -476,7 +479,8 @@ class Message:
if device_report:
report = f"{device_report}{report}"
individual_reports.append(report)
if (errors[key] and not skip_errored_out_tests) or device_report:
individual_reports.append(report)
header = "Single | Multi | Category\n"
failures_report = prepare_reports(
@ -929,11 +933,18 @@ if __name__ == "__main__":
Message.error_out(title, ci_title, runner_not_available, runner_failed, setup_failed)
exit(0)
arguments = sys.argv[1:][0]
arguments = None
if len(sys.argv) > 1:
arguments = sys.argv[1:][0]
try:
folder_slices = ast.literal_eval(arguments)
# Need to change from elements like `models/bert` to `models_bert` (the ones used as artifact names).
models = [x.replace("models/", "models_") for folders in folder_slices for x in folders]
if arguments is not None:
folder_slices = ast.literal_eval(arguments)
# Need to change from elements like `models/bert` to `models_bert` (the ones used as artifact names).
models = [x.replace("models/", "models_") for folders in folder_slices for x in folders]
else:
models = []
except SyntaxError:
Message.error_out(title, ci_title)
raise ValueError("Errored out.")
@ -1043,6 +1054,7 @@ if __name__ == "__main__":
"PyTorch pipelines": "run_tests_torch_pipeline_gpu",
"TensorFlow pipelines": "run_tests_tf_pipeline_gpu",
"Torch CUDA extension tests": "run_tests_torch_cuda_extensions_gpu_test_reports",
"Integration tests": "run_integration_tests",
"Quantization tests": "run_tests_quantization_torch_gpu",
}

View File

@ -34,6 +34,7 @@ python ../utils/split_model_tests.py --num_splits 64
import argparse
import os
from glob import glob
if __name__ == "__main__":
@ -46,12 +47,13 @@ if __name__ == "__main__":
)
args = parser.parse_args()
tests = os.getcwd()
model_tests = os.listdir(os.path.join(tests, "models"))
d1 = sorted(filter(os.path.isdir, os.listdir(tests)))
d2 = sorted(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))
d1.remove("models")
d = d2 + d1
tests_dir = os.path.join(os.getcwd(), "tests")
model_tests_dir = os.path.join(tests_dir, "models")
model_tests_subfolders = glob(os.path.join(model_tests_dir, "*"))
d = sorted(filter(os.path.isdir, model_tests_subfolders))
d = ["/".join(sub_directory.split("/")[-2:]) for sub_directory in d]
num_jobs = len(d)
num_jobs_per_splits = num_jobs // args.num_splits