mirror of
https://github.com/huggingface/transformers.git
synced 2025-11-14 22:55:41 +08:00
Compare commits
6 Commits
better-ini
...
rm_last_ke
| Author | SHA1 | Date | |
|---|---|---|---|
| 9939e585c0 | |||
| e7d8a450d9 | |||
| 3df8123844 | |||
| 5fed89ac13 | |||
| a8820d1903 | |||
| ccbfcc1361 |
@ -46,8 +46,8 @@ jobs:
|
||||
- run: uv pip install -U -e .
|
||||
- run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
|
||||
- run: mkdir -p test_preparation
|
||||
- run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt || true
|
||||
- run: python utils/tests_fetcher.py --filter_tests || true
|
||||
- run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt
|
||||
- run: python utils/tests_fetcher.py --filter_tests
|
||||
- run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation
|
||||
- run: |
|
||||
if [ ! -s test_preparation/generated_config.yml ]; then
|
||||
@ -98,8 +98,8 @@ jobs:
|
||||
- run: uv pip install -U -e .
|
||||
- run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
|
||||
- run: mkdir -p test_preparation
|
||||
- run: python utils/tests_fetcher.py --fetch_all | tee tests_fetched_summary.txt || true
|
||||
- run: python utils/tests_fetcher.py --filter_tests || true
|
||||
- run: python utils/tests_fetcher.py --fetch_all | tee tests_fetched_summary.txt
|
||||
- run: python utils/tests_fetcher.py --filter_tests
|
||||
- run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation
|
||||
- run: |
|
||||
if [ ! -s test_preparation/generated_config.yml ]; then
|
||||
|
||||
@ -185,8 +185,8 @@ class CircleCIJob:
|
||||
},
|
||||
# During the CircleCI docker images build time, we might already (or not) download the data.
|
||||
# If it's done already, the files are inside the directory `/test_data/`.
|
||||
# {"run": {"name": "fetch hub objects before pytest", "command": "cp -r /test_data/* . 2>/dev/null || true; python3 utils/fetch_hub_objects_for_ci.py"}},
|
||||
# {"run": {"name": "download and unzip hub cache", "command": 'curl -L -o huggingface-cache.tar.gz https://huggingface.co/datasets/hf-internal-testing/hf_hub_cache/resolve/main/huggingface-cache.tar.gz && apt-get install pigz && tar --use-compress-program="pigz -d -p 8" -xf huggingface-cache.tar.gz && mv -n hub/* /root/.cache/huggingface/hub/ && ls -la /root/.cache/huggingface/hub/'}},
|
||||
{"run": {"name": "fetch hub objects before pytest", "command": "cp -r /test_data/* . 2>/dev/null || true; python3 utils/fetch_hub_objects_for_ci.py"}},
|
||||
{"run": {"name": "download and unzip hub cache", "command": 'curl -L -o huggingface-cache.tar.gz https://huggingface.co/datasets/hf-internal-testing/hf_hub_cache/resolve/main/huggingface-cache.tar.gz && apt-get install pigz && tar --use-compress-program="pigz -d -p 8" -xf huggingface-cache.tar.gz && mv -n hub/* /root/.cache/huggingface/hub/ && ls -la /root/.cache/huggingface/hub/'}},
|
||||
{"run": {
|
||||
"name": "Run tests",
|
||||
"command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
|
||||
|
||||
1
.github/scripts/codeowners_for_review_action
vendored
1
.github/scripts/codeowners_for_review_action
vendored
@ -22,6 +22,7 @@ tests/generation/ @gante
|
||||
/src/transformers/models/auto/ @ArthurZucker
|
||||
/src/transformers/utils/ @ArthurZucker @Rocketknight1
|
||||
/src/transformers/loss/ @ArthurZucker
|
||||
/src/transformers/onnx/ @michaelbenayoun
|
||||
|
||||
# Specific files come after the sections/globs, so they take priority
|
||||
/.circleci/config.yml @ArthurZucker @ydshieh
|
||||
|
||||
10
.github/workflows/benchmark.yml
vendored
10
.github/workflows/benchmark.yml
vendored
@ -28,20 +28,20 @@ jobs:
|
||||
(github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark') )||
|
||||
(github.event_name == 'push' && github.ref == 'refs/heads/main')
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu
|
||||
image: huggingface/transformers-pytorch-gpu
|
||||
options: --gpus all --privileged --ipc host
|
||||
steps:
|
||||
- name: Get repo
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 1
|
||||
ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
|
||||
- name: Install benchmark script dependencies
|
||||
run: python3 -m pip install -r benchmark_v2/requirements.txt kernels
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e ".[torch]"
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e ".[torch]" && python3 -m pip uninstall -y torchvision # temp fix
|
||||
|
||||
- name: Run benchmark
|
||||
run: |
|
||||
@ -52,7 +52,7 @@ jobs:
|
||||
commit_id=$GITHUB_SHA
|
||||
fi
|
||||
commit_msg=$(git show -s --format=%s | cut -c1-70)
|
||||
python3 benchmark_v2/run_benchmarks.py -b 32 -s 128 -n 256 --level 2 --branch-name "$BRANCH_NAME" --commit-id "$commit_id" --commit-message "$commit_msg" --model-id "$MODEL_ID" --log-level INFO --push-result-to-dataset "$DATASET_ID"
|
||||
python3 benchmark_v2/run_benchmarks.py -b 32 -s 128 -n 256 --branch-name "$BRANCH_NAME" --commit-id "$commit_id" --commit-message "$commit_msg" --model-id "$MODEL_ID" --log-level INFO --push-result-to-dataset "$DATASET_ID"
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
PUSH_TO_HUB_TOKEN: ${{ secrets.PUSH_TO_HUB_TOKEN }}
|
||||
|
||||
@ -9,7 +9,7 @@ jobs:
|
||||
uses: ./.github/workflows/benchmark_v2.yml
|
||||
with:
|
||||
runner: aws-g5-4xlarge-cache-use1-public-80
|
||||
container_image: huggingface/transformers-all-latest-gpu
|
||||
container_image: huggingface/transformers-pytorch-gpu
|
||||
container_options: --gpus all --privileged --ipc host --shm-size "16gb"
|
||||
commit_sha: ${{ github.sha }}
|
||||
run_id: ${{ github.run_id }}
|
||||
|
||||
184
.github/workflows/build-docker-images.yml
vendored
184
.github/workflows/build-docker-images.yml
vendored
@ -45,59 +45,33 @@ jobs:
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the transformers-all-latest-gpu docker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
flash-attn-ci-image:
|
||||
name: "PyTorch with Flash Attn [dev]"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
# Push CI images still need to be re-built daily
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
name: Build and push (for Push CI) in a daily basis
|
||||
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
||||
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-all-latest-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
PYTORCH=2.8.0
|
||||
TORCHCODEC=0.7.0
|
||||
FLASH_ATTN=yes
|
||||
push: true
|
||||
tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}:flash-attn
|
||||
tags: huggingface/transformers-all-latest-gpu-push-ci
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the transformers-all-latest-gpu docker build
|
||||
title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
latest-torch-deepspeed-docker:
|
||||
name: "Latest PyTorch + DeepSpeed"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
group: aws-g4dn-2xlarge-cache
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
@ -130,8 +104,51 @@ jobs:
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
# Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
|
||||
latest-torch-deepspeed-docker-for-push-ci-daily-build:
|
||||
name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
# Push CI images still need to be re-built daily
|
||||
-
|
||||
name: Build and push (for Push CI) in a daily basis
|
||||
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
||||
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-deepspeed-latest-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
doc-builder:
|
||||
name: "Doc builder"
|
||||
# Push CI doesn't need this image
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
@ -164,6 +181,44 @@ jobs:
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
latest-pytorch:
|
||||
name: "Latest PyTorch [dev]"
|
||||
# Push CI doesn't need this image
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-gpu
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
latest-pytorch-amd:
|
||||
name: "Latest PyTorch (AMD) [dev]"
|
||||
runs-on:
|
||||
@ -190,47 +245,29 @@ jobs:
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }}
|
||||
# Push CI images still need to be re-built daily
|
||||
-
|
||||
name: Build and push (for Push CI) in a daily basis
|
||||
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
||||
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-amd-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-amd-gpu-push-ci
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
|
||||
title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu build
|
||||
title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build
|
||||
status: ${{ job.status }}
|
||||
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
|
||||
cache-latest-pytorch-amd:
|
||||
name: "Cache Latest Pytorch (AMD) Image"
|
||||
needs: latest-pytorch-amd
|
||||
runs-on:
|
||||
group: amd-mi325-1gpu
|
||||
steps:
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
|
||||
-
|
||||
name: Pull and save docker image to cache
|
||||
run: |
|
||||
image="huggingface/transformers-pytorch-amd-gpu"
|
||||
final_path="/mnt/image-cache/transformers-pytorch-amd-gpu.tar"
|
||||
tmp_path="${final_path}.tmp"
|
||||
|
||||
echo "Pulling image: ${image}"
|
||||
docker pull "${image}"
|
||||
|
||||
echo "Saving to temp file: ${tmp_path}"
|
||||
docker save "${image}" -o "${tmp_path}"
|
||||
|
||||
echo "Moving to final path: ${final_path}"
|
||||
mv -f "${tmp_path}" "${final_path}"
|
||||
|
||||
echo "Cache populated successfully at ${final_path}"
|
||||
|
||||
latest-pytorch-deepspeed-amd:
|
||||
name: "PyTorch + DeepSpeed (AMD) [dev]"
|
||||
runs-on:
|
||||
@ -257,6 +294,19 @@ jobs:
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
|
||||
# Push CI images still need to be re-built daily
|
||||
-
|
||||
name: Build and push (for Push CI) in a daily basis
|
||||
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
|
||||
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-deepspeed-amd-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
@ -269,6 +319,8 @@ jobs:
|
||||
|
||||
latest-quantization-torch-docker:
|
||||
name: "Latest Pytorch + Quantization [dev]"
|
||||
# Push CI doesn't need this image
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
runs-on:
|
||||
group: aws-general-8-plus
|
||||
steps:
|
||||
|
||||
23
.github/workflows/check-workflow-permissions.yml
vendored
23
.github/workflows/check-workflow-permissions.yml
vendored
@ -1,23 +0,0 @@
|
||||
---
|
||||
name: Check Permissions Advisor
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
workflow_name:
|
||||
description: 'Workflow file name'
|
||||
type: string
|
||||
run_count:
|
||||
description: 'Number of runs to analyze'
|
||||
type: string
|
||||
default: "10"
|
||||
|
||||
jobs:
|
||||
advisor:
|
||||
uses: huggingface/security-workflows/.github/workflows/permissions-advisor-reusable.yml@main
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
with:
|
||||
workflow_name: ${{ inputs.workflow_name }}
|
||||
run_count: ${{ fromJSON(inputs.run_count) }}
|
||||
142
.github/workflows/check_failed_tests.yml
vendored
142
.github/workflows/check_failed_tests.yml
vendored
@ -6,6 +6,9 @@ on:
|
||||
docker:
|
||||
required: true
|
||||
type: string
|
||||
start_sha:
|
||||
required: true
|
||||
type: string
|
||||
job:
|
||||
required: true
|
||||
type: string
|
||||
@ -21,13 +24,7 @@ on:
|
||||
commit_sha:
|
||||
required: false
|
||||
type: string
|
||||
pr_number:
|
||||
required: false
|
||||
type: string
|
||||
outputs:
|
||||
report:
|
||||
description: "Content of the report of new failures"
|
||||
value: ${{ jobs.process_new_failures_with_commit_info.outputs.report }}
|
||||
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
@ -64,15 +61,13 @@ jobs:
|
||||
- name: Check file
|
||||
id: check_file
|
||||
working-directory: /transformers
|
||||
env:
|
||||
job: ${{ inputs.job }}
|
||||
run: |
|
||||
if [ -f "ci_results_${job}/new_failures.json" ]; then
|
||||
echo "\`ci_results_${job}/new_failures.json\` exists, continue ..."
|
||||
if [ -f ci_results_${{ inputs.job }}/new_failures.json ]; then
|
||||
echo "`ci_results_${{ inputs.job }}/new_failures.json` exists, continue ..."
|
||||
echo "process=true" >> $GITHUB_ENV
|
||||
echo "process=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "\`ci_results_${job}/new_failures.json\` doesn't exist, abort."
|
||||
echo "`ci_results_${{ inputs.job }}/new_failures.json` doesn't exist, abort."
|
||||
echo "process=false" >> $GITHUB_ENV
|
||||
echo "process=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
@ -93,62 +88,27 @@ jobs:
|
||||
echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
if [ -f setup_values/other_workflow_run_id.txt ]; then
|
||||
echo "OTHER_WORKFLOW_RUN_ID=$(cat setup_values/other_workflow_run_id.txt)" >> $GITHUB_ENV
|
||||
else
|
||||
echo "OTHER_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
env:
|
||||
commit_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
run: |
|
||||
git fetch origin "$commit_sha" && git checkout "$commit_sha"
|
||||
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
|
||||
|
||||
- name: Get `START_SHA`
|
||||
- name: Get target commit
|
||||
working-directory: /transformers/utils
|
||||
if: ${{ env.process == 'true' }}
|
||||
env:
|
||||
commit_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
run: |
|
||||
echo "START_SHA=$commit_sha" >> $GITHUB_ENV
|
||||
echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"], workflow_run_id=os.environ["PREV_WORKFLOW_RUN_ID"]); print(commit)')" >> $GITHUB_ENV
|
||||
|
||||
# This is used if the CI is triggered from a pull request `self-comment-ci.yml` (after security check is verified)
|
||||
- name: Extract the base commit on `main` (of the merge commit created by Github) if it is a PR
|
||||
id: pr_info
|
||||
if: ${{ env.process == 'true' && inputs.pr_number != '' }}
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
script: |
|
||||
const { data: pr } = await github.rest.pulls.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
pull_number: ${{ inputs.pr_number }}
|
||||
});
|
||||
|
||||
const { data: merge_commit } = await github.rest.repos.getCommit({
|
||||
owner: pr.base.repo.owner.login,
|
||||
repo: pr.base.repo.name,
|
||||
ref: '${{ inputs.commit_sha }}',
|
||||
});
|
||||
|
||||
core.setOutput('merge_commit_base_sha', merge_commit.parents[0].sha);
|
||||
|
||||
# Usually, `END_SHA` should be the commit of the last previous workflow run of the **SAME** (scheduled) workflow.
|
||||
# (This is why we don't need to specify `workflow_id` which would be fetched automatically in the python script.)
|
||||
- name: Get `END_SHA` from previous CI runs of the same workflow
|
||||
working-directory: /transformers/utils
|
||||
if: ${{ env.process == 'true' && inputs.pr_number == '' }}
|
||||
env:
|
||||
ACCESS_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
run: |
|
||||
echo "END_SHA=$(TOKEN="$ACCESS_TOKEN" python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"], workflow_run_id=os.environ["PREV_WORKFLOW_RUN_ID"]); print(commit)')" >> $GITHUB_ENV
|
||||
|
||||
# However, for workflow runs triggered by `issue_comment` (for pull requests), we want to check against the
|
||||
# parent commit (on `main`) of the `merge_commit` (dynamically created by GitHub). In this case, the goal is to
|
||||
# see if a reported failing test is actually ONLY failing on the `merge_commit`.
|
||||
- name: Set `END_SHA`
|
||||
if: ${{ env.process == 'true' && inputs.pr_number != '' }}
|
||||
env:
|
||||
merge_commit_base_sha: ${{ steps.pr_info.outputs.merge_commit_base_sha }}
|
||||
run: |
|
||||
echo "END_SHA=$merge_commit_base_sha" >> $GITHUB_ENV
|
||||
- name: Checkout to `start_sha`
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: git fetch && git checkout ${{ inputs.start_sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
@ -178,20 +138,14 @@ jobs:
|
||||
- name: Check failed tests
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
env:
|
||||
job: ${{ inputs.job }}
|
||||
run_idx: ${{ matrix.run_idx }}
|
||||
run: python3 utils/check_bad_commit.py --start_commit "$START_SHA" --end_commit "$END_SHA" --file "ci_results_${job}/new_failures.json" --output_file "new_failures_with_bad_commit_${job}_${run_idx}.json"
|
||||
run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_${{ inputs.job }}/new_failures.json --output_file new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json
|
||||
|
||||
- name: Show results
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
env:
|
||||
job: ${{ inputs.job }}
|
||||
run_idx: ${{ matrix.run_idx }}
|
||||
run: |
|
||||
ls -l "new_failures_with_bad_commit_${job}_${run_idx}.json"
|
||||
cat "new_failures_with_bad_commit_${job}_${run_idx}.json"
|
||||
ls -l new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json
|
||||
cat new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
@ -205,8 +159,6 @@ jobs:
|
||||
if: needs.check_new_failures.outputs.process == 'true'
|
||||
runs-on:
|
||||
group: aws-g5-4xlarge-cache
|
||||
outputs:
|
||||
report: ${{ steps.set_output.outputs.report }}
|
||||
container:
|
||||
image: ${{ inputs.docker }}
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
@ -224,28 +176,32 @@ jobs:
|
||||
|
||||
- name: Check files
|
||||
working-directory: /transformers
|
||||
env:
|
||||
job: ${{ inputs.job }}
|
||||
run: |
|
||||
ls -la /transformers
|
||||
ls -la "/transformers/new_failures_with_bad_commit_${job}"
|
||||
ls -la /transformers/new_failures_with_bad_commit_${{ inputs.job }}
|
||||
|
||||
# Currently, we only run with a single runner by using `run_idx: [1]`. We might try to run with multiple runners
|
||||
# to further reduce the false positive caused by flaky tests, which requires further processing to merge reports.
|
||||
- name: Merge files
|
||||
shell: bash
|
||||
working-directory: /transformers
|
||||
env:
|
||||
job: ${{ inputs.job }}
|
||||
run: |
|
||||
cp "/transformers/new_failures_with_bad_commit_${job}/new_failures_with_bad_commit_${job}_1.json" new_failures_with_bad_commit.json
|
||||
cp /transformers/new_failures_with_bad_commit_${{ inputs.job }}/new_failures_with_bad_commit_${{ inputs.job }}_1.json new_failures_with_bad_commit.json
|
||||
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
|
||||
|
||||
- name: Process report
|
||||
shell: bash
|
||||
working-directory: /transformers
|
||||
env:
|
||||
commit_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
|
||||
JOB_NAME: ${{ inputs.job }}
|
||||
REPORT_REPO_ID: ${{ inputs.report_repo_id }}
|
||||
run: |
|
||||
git fetch origin "$commit_sha" && git checkout "$commit_sha"
|
||||
python3 utils/process_bad_commit_report.py
|
||||
|
||||
- name: Process report
|
||||
shell: bash
|
||||
@ -262,37 +218,11 @@ jobs:
|
||||
echo EOF
|
||||
} >> "$GITHUB_ENV"
|
||||
|
||||
# The output is useful if a caller needs more processing, for example, we have a chain
|
||||
# self-comment-ci.yml -> self-scheduled.yml -> this one (check_failed_tests.yml),
|
||||
# and `self-comment-ci.yml` needs further processing before sending a GitHub comment to the pull request page.
|
||||
- name: Show results & Set outputs
|
||||
id: set_output
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
ls -l new_failures_with_bad_commit.json
|
||||
cat new_failures_with_bad_commit.json
|
||||
|
||||
{
|
||||
echo 'report<<EOF'
|
||||
cat new_failures_with_bad_commit.json
|
||||
echo '' # Force a newline
|
||||
echo EOF
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: new_failures_with_bad_commit_${{ inputs.job }}
|
||||
path: /transformers/new_failures_with_bad_commit.json
|
||||
|
||||
- name: Prepare Slack report title
|
||||
working-directory: /transformers
|
||||
env:
|
||||
ci_event: ${{ inputs.ci_event }}
|
||||
job: ${{ inputs.job }}
|
||||
run: |
|
||||
pip install slack_sdk
|
||||
echo "title=$(python3 -c 'import sys; import os; sys.path.append("utils"); from utils.notification_service import job_to_test_map; ci_event = os.environ["ci_event"]; job = os.environ["job"]; test_name = job_to_test_map[job]; title = f"New failed tests of {ci_event}" + ":" + f" {test_name}"; print(title)')" >> $GITHUB_ENV
|
||||
echo "title=$(python3 -c 'import sys; sys.path.append("utils"); from utils.notification_service import job_to_test_map; ci_event = "${{ inputs.ci_event }}"; job = "${{ inputs.job }}"; test_name = job_to_test_map[job]; title = f"New failed tests of {ci_event}" + ":" + f" {test_name}"; print(title)')" >> $GITHUB_ENV
|
||||
|
||||
- name: Send processed report
|
||||
if: ${{ !endsWith(env.REPORT_TEXT, '{}') }}
|
||||
|
||||
22
.github/workflows/codeql.yml
vendored
22
.github/workflows/codeql.yml
vendored
@ -1,22 +0,0 @@
|
||||
---
|
||||
name: CodeQL Security Analysis
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ["main", "fix_security_issue_*"]
|
||||
# pull_request:
|
||||
# branches: ["main"]
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
codeql:
|
||||
name: CodeQL Analysis
|
||||
uses: huggingface/security-workflows/.github/workflows/codeql-reusable.yml@main
|
||||
permissions:
|
||||
security-events: write
|
||||
packages: read
|
||||
actions: read
|
||||
contents: read
|
||||
with:
|
||||
languages: '["actions"]'
|
||||
queries: 'security-extended,security-and-quality'
|
||||
24
.github/workflows/get-pr-info.yml
vendored
24
.github/workflows/get-pr-info.yml
vendored
@ -39,9 +39,6 @@ on:
|
||||
PR_MERGE_COMMIT_SHA:
|
||||
description: "The sha of the merge commit for the pull request (created by GitHub) in the base repository"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_SHA }}
|
||||
PR_MERGE_COMMIT_BASE_SHA:
|
||||
description: "The sha of the parent commit of the the merge commit on the target branch in the base repository"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_BASE_SHA }}
|
||||
PR_HEAD_COMMIT_DATE:
|
||||
description: "The date of the head sha of the pull request branch in the head repository"
|
||||
value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_DATE }}
|
||||
@ -77,7 +74,6 @@ jobs:
|
||||
PR_BASE_REF: ${{ steps.pr_info.outputs.base_ref }}
|
||||
PR_HEAD_SHA: ${{ steps.pr_info.outputs.head_sha }}
|
||||
PR_BASE_SHA: ${{ steps.pr_info.outputs.base_sha }}
|
||||
PR_MERGE_COMMIT_BASE_SHA: ${{ steps.pr_info.outputs.merge_commit_base_sha }}
|
||||
PR_MERGE_COMMIT_SHA: ${{ steps.pr_info.outputs.merge_commit_sha }}
|
||||
PR_HEAD_COMMIT_DATE: ${{ steps.pr_info.outputs.head_commit_date }}
|
||||
PR_MERGE_COMMIT_DATE: ${{ steps.pr_info.outputs.merge_commit_date }}
|
||||
@ -126,7 +122,6 @@ jobs:
|
||||
core.setOutput('base_ref', pr.base.ref);
|
||||
core.setOutput('head_sha', pr.head.sha);
|
||||
core.setOutput('base_sha', pr.base.sha);
|
||||
core.setOutput('merge_commit_base_sha', merge_commit.parents[0].sha);
|
||||
core.setOutput('merge_commit_sha', pr.merge_commit_sha);
|
||||
core.setOutput('pr', pr);
|
||||
|
||||
@ -147,21 +142,16 @@ jobs:
|
||||
date: merge_commit.commit.committer.date
|
||||
});
|
||||
|
||||
console.log('PR Info:', {
|
||||
pr_info: pr
|
||||
});
|
||||
|
||||
- name: Convert dates to timestamps
|
||||
id: get_timestamps
|
||||
env:
|
||||
head_commit_date: ${{ steps.pr_info.outputs.head_commit_date }}
|
||||
merge_commit_date: ${{ steps.pr_info.outputs.merge_commit_date }}
|
||||
run: |
|
||||
echo "$head_commit_date"
|
||||
echo "$merge_commit_date"
|
||||
head_commit_date=${{ steps.pr_info.outputs.head_commit_date }}
|
||||
merge_commit_date=${{ steps.pr_info.outputs.merge_commit_date }}
|
||||
echo $head_commit_date
|
||||
echo $merge_commit_date
|
||||
head_commit_timestamp=$(date -d "$head_commit_date" +%s)
|
||||
merge_commit_timestamp=$(date -d "$merge_commit_date" +%s)
|
||||
echo "$head_commit_timestamp"
|
||||
echo "$merge_commit_timestamp"
|
||||
echo $head_commit_timestamp
|
||||
echo $merge_commit_timestamp
|
||||
echo "head_commit_timestamp=$head_commit_timestamp" >> $GITHUB_OUTPUT
|
||||
echo "merge_commit_timestamp=$merge_commit_timestamp" >> $GITHUB_OUTPUT
|
||||
echo "merge_commit_timestamp=$merge_commit_timestamp" >> $GITHUB_OUTPUT
|
||||
|
||||
22
.github/workflows/get-pr-number.yml
vendored
22
.github/workflows/get-pr-number.yml
vendored
@ -15,19 +15,13 @@ jobs:
|
||||
steps:
|
||||
- name: Get PR number
|
||||
shell: bash
|
||||
env:
|
||||
issue_number: ${{ github.event.issue.number }}
|
||||
is_pull_request_issue: ${{ github.event.issue.pull_request != null }}
|
||||
pr_number: ${{ github.event.pull_request.number }}
|
||||
is_pull_request: ${{ github.event.pull_request != null }}
|
||||
event_number: ${{ github.event.number }}
|
||||
run: |
|
||||
if [[ "$issue_number" != "" && "$is_pull_request_issue" == "true" ]]; then
|
||||
echo "PR_NUMBER=$issue_number" >> $GITHUB_ENV
|
||||
elif [[ "$pr_number" != "" ]]; then
|
||||
echo "PR_NUMBER=$pr_number" >> $GITHUB_ENV
|
||||
elif [[ "$is_pull_request" == "true" ]]; then
|
||||
echo "PR_NUMBER=$event_number" >> $GITHUB_ENV
|
||||
if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then
|
||||
echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
|
||||
elif [[ "${{ github.event.pull_request.number }}" != "" ]]; then
|
||||
echo "PR_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
|
||||
elif [[ "${{ github.event.pull_request }}" != "" ]]; then
|
||||
echo "PR_NUMBER=${{ github.event.number }}" >> $GITHUB_ENV
|
||||
else
|
||||
echo "PR_NUMBER=" >> $GITHUB_ENV
|
||||
fi
|
||||
@ -35,8 +29,8 @@ jobs:
|
||||
- name: Check PR number
|
||||
shell: bash
|
||||
run: |
|
||||
echo "$PR_NUMBER"
|
||||
echo "${{ env.PR_NUMBER }}"
|
||||
|
||||
- name: Set PR number
|
||||
id: set_pr_number
|
||||
run: echo "PR_NUMBER=$PR_NUMBER" >> "$GITHUB_OUTPUT"
|
||||
run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
63
.github/workflows/model_jobs.yml
vendored
63
.github/workflows/model_jobs.yml
vendored
@ -28,9 +28,6 @@ on:
|
||||
report_repo_id:
|
||||
required: false
|
||||
type: string
|
||||
pytest_marker:
|
||||
required: false
|
||||
type: string
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
@ -62,33 +59,25 @@ jobs:
|
||||
steps:
|
||||
- name: Echo input and matrix info
|
||||
shell: bash
|
||||
env:
|
||||
folder_slices: ${{ inputs.folder_slices }}
|
||||
matrix_folders: ${{ matrix.folders }}
|
||||
slice_data: ${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}
|
||||
run: |
|
||||
echo "$folder_slices"
|
||||
echo "$matrix_folders"
|
||||
echo "$slice_data"
|
||||
echo "${{ inputs.folder_slices }}"
|
||||
echo "${{ matrix.folders }}"
|
||||
echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}"
|
||||
|
||||
- name: Echo folder ${{ matrix.folders }}
|
||||
shell: bash
|
||||
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
|
||||
# set the artifact folder names (because the character `/` is not allowed).
|
||||
env:
|
||||
matrix_folders_raw: ${{ matrix.folders }}
|
||||
run: |
|
||||
echo "$matrix_folders_raw"
|
||||
matrix_folders="${matrix_folders_raw/'models/'/'models_'}"
|
||||
echo "${{ matrix.folders }}"
|
||||
matrix_folders=${{ matrix.folders }}
|
||||
matrix_folders=${matrix_folders/'models/'/'models_'}
|
||||
echo "$matrix_folders"
|
||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
env:
|
||||
commit_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
run: |
|
||||
git fetch origin "$commit_sha" && git checkout "$commit_sha"
|
||||
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
@ -123,17 +112,15 @@ jobs:
|
||||
id: set_machine_type
|
||||
working-directory: /transformers
|
||||
shell: bash
|
||||
env:
|
||||
input_machine_type: ${{ inputs.machine_type }}
|
||||
run: |
|
||||
echo "$input_machine_type"
|
||||
echo "${{ inputs.machine_type }}"
|
||||
|
||||
if [ "$input_machine_type" = "aws-g5-4xlarge-cache" ]; then
|
||||
if [ "${{ inputs.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "$input_machine_type" = "aws-g5-12xlarge-cache" ]; then
|
||||
elif [ "${{ inputs.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type="$input_machine_type"
|
||||
machine_type=${{ inputs.machine_type }}
|
||||
fi
|
||||
|
||||
echo "$machine_type"
|
||||
@ -142,21 +129,15 @@ jobs:
|
||||
|
||||
- name: Create report directory if it doesn't exist
|
||||
shell: bash
|
||||
env:
|
||||
report_name_prefix: ${{ inputs.report_name_prefix }}
|
||||
run: |
|
||||
mkdir -p "/transformers/reports/${machine_type}_${report_name_prefix}_${matrix_folders}_test_reports"
|
||||
echo "dummy" > "/transformers/reports/${machine_type}_${report_name_prefix}_${matrix_folders}_test_reports/dummy.txt"
|
||||
ls -la "/transformers/reports/${machine_type}_${report_name_prefix}_${matrix_folders}_test_reports"
|
||||
mkdir -p /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
|
||||
echo "dummy" > /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports/dummy.txt
|
||||
ls -la /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
|
||||
|
||||
- name: Run all tests on GPU
|
||||
working-directory: /transformers
|
||||
env:
|
||||
report_name_prefix: ${{ inputs.report_name_prefix }}
|
||||
pytest_marker: ${{ inputs.pytest_marker }}
|
||||
model: ${{ matrix.folders }}
|
||||
run: |
|
||||
script -q -c "PATCH_TESTING_METHODS_TO_COLLECT_OUTPUTS=yes _PATCHED_TESTING_METHODS_OUTPUT_DIR=/transformers/reports/${machine_type}_${report_name_prefix}_${matrix_folders}_test_reports python3 -m pytest -rsfE -v -m '${pytest_marker}' --make-reports=${machine_type}_${report_name_prefix}_${matrix_folders}_test_reports tests/${model}" test_outputs.txt
|
||||
script -q -c "PATCH_TESTING_METHODS_TO_COLLECT_OUTPUTS=yes _PATCHED_TESTING_METHODS_OUTPUT_DIR=/transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports tests/${{ matrix.folders }}" test_outputs.txt
|
||||
ls -la
|
||||
# Extract the exit code from the output file
|
||||
EXIT_CODE=$(tail -1 test_outputs.txt | grep -o 'COMMAND_EXIT_CODE="[0-9]*"' | cut -d'"' -f2)
|
||||
@ -167,25 +148,19 @@ jobs:
|
||||
# This step is only to show information on Github Actions log.
|
||||
# Always mark this step as successful, even if the report directory or the file `failures_short.txt` in it doesn't exist
|
||||
continue-on-error: true
|
||||
env:
|
||||
report_name_prefix: ${{ inputs.report_name_prefix }}
|
||||
run: cat "/transformers/reports/${machine_type}_${report_name_prefix}_${matrix_folders}_test_reports/failures_short.txt"
|
||||
run: cat /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports/failures_short.txt
|
||||
|
||||
- name: Captured information
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
env:
|
||||
report_name_prefix: ${{ inputs.report_name_prefix }}
|
||||
run: |
|
||||
cat "/transformers/reports/${machine_type}_${report_name_prefix}_${matrix_folders}_test_reports/captured_info.txt"
|
||||
cat /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports/captured_info.txt
|
||||
|
||||
- name: Copy test_outputs.txt
|
||||
if: ${{ always() }}
|
||||
continue-on-error: true
|
||||
env:
|
||||
report_name_prefix: ${{ inputs.report_name_prefix }}
|
||||
run: |
|
||||
cp /transformers/test_outputs.txt "/transformers/reports/${machine_type}_${report_name_prefix}_${matrix_folders}_test_reports"
|
||||
cp /transformers/test_outputs.txt /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
@ -196,7 +171,7 @@ jobs:
|
||||
|
||||
collated_reports:
|
||||
name: Collated Reports
|
||||
if: ${{ always() && inputs.runner_type != '' }}
|
||||
if: ${{ always() }}
|
||||
needs: run_models_gpu
|
||||
uses: huggingface/transformers/.github/workflows/collated-reports.yml@main
|
||||
with:
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
name: PR slow CI - Suggestion
|
||||
name: PR slow CI
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened, synchronize, reopened]
|
||||
@ -23,28 +23,11 @@ jobs:
|
||||
outputs:
|
||||
jobs: ${{ steps.get_jobs.outputs.jobs_to_run }}
|
||||
steps:
|
||||
# This checkout to the main branch
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: "0"
|
||||
|
||||
# We need to use `${{ ... }}` here to avoid `Argument list too long` error when a PR changes a lot of files.
|
||||
# (We could also try to use artifact approach, but it's more involved).
|
||||
# `CodeQL` doesn't identify any security issue here. Also `PR_FILES` is from `get-pr-info.yml` by using an api
|
||||
# `github.rest.pulls.listFiles`, which is fine.
|
||||
- name: Write pr_files file
|
||||
run: |
|
||||
cat > pr_files.txt << 'EOF'
|
||||
${{ needs.get-pr-info.outputs.PR_FILES }}
|
||||
EOF
|
||||
|
||||
- name: Get repository content
|
||||
id: repo_content
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
script: |
|
||||
const fs = require('node:fs');
|
||||
|
||||
const { data: tests_dir } = await github.rest.repos.getContent({
|
||||
owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}',
|
||||
repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}',
|
||||
@ -66,10 +49,38 @@ jobs:
|
||||
ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}',
|
||||
});
|
||||
|
||||
// Write to files instead of outputs
|
||||
fs.writeFileSync('tests_dir.txt', JSON.stringify(tests_dir, null, 2));
|
||||
fs.writeFileSync('tests_models_dir.txt', JSON.stringify(tests_models_dir, null, 2));
|
||||
fs.writeFileSync('tests_quantization_dir.txt', JSON.stringify(tests_quantization_dir, null, 2));
|
||||
core.setOutput('tests_dir', tests_dir);
|
||||
core.setOutput('tests_models_dir', tests_models_dir);
|
||||
core.setOutput('tests_quantization_dir', tests_quantization_dir);
|
||||
|
||||
# This checkout to the main branch
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: "0"
|
||||
|
||||
- name: Write pr_files file
|
||||
run: |
|
||||
cat > pr_files.txt << 'EOF'
|
||||
${{ needs.get-pr-info.outputs.PR_FILES }}
|
||||
EOF
|
||||
|
||||
- name: Write tests_dir file
|
||||
run: |
|
||||
cat > tests_dir.txt << 'EOF'
|
||||
${{ steps.repo_content.outputs.tests_dir }}
|
||||
EOF
|
||||
|
||||
- name: Write tests_models_dir file
|
||||
run: |
|
||||
cat > tests_models_dir.txt << 'EOF'
|
||||
${{ steps.repo_content.outputs.tests_models_dir }}
|
||||
EOF
|
||||
|
||||
- name: Write tests_quantization_dir file
|
||||
run: |
|
||||
cat > tests_quantization_dir.txt << 'EOF'
|
||||
${{ steps.repo_content.outputs.tests_quantization_dir }}
|
||||
EOF
|
||||
|
||||
- name: Run script to get jobs to run
|
||||
id: get_jobs
|
||||
4
.github/workflows/push-important-models.yml
vendored
4
.github/workflows/push-important-models.yml
vendored
@ -149,9 +149,9 @@ jobs:
|
||||
with:
|
||||
job: run_models_gpu
|
||||
slack_report_channel: "#transformers-ci-push"
|
||||
docker: huggingface/transformers-all-latest-gpu:flash-attn
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
ci_event: push
|
||||
report_repo_id: hf-internal-testing/transformers_ci_push
|
||||
commit_sha: ${{ github.sha }}
|
||||
subdirs: ${{ needs.get_modified_models.outputs.matrix }}
|
||||
models: ${{ needs.get_modified_models.outputs.matrix }}
|
||||
secrets: inherit
|
||||
|
||||
505
.github/workflows/self-comment-ci.yml
vendored
505
.github/workflows/self-comment-ci.yml
vendored
@ -23,34 +23,62 @@ env:
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
|
||||
|
||||
jobs:
|
||||
get-pr-number:
|
||||
runs-on: ubuntu-22.04
|
||||
name: Get PR number
|
||||
# For security: only allow team members to run
|
||||
if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "eustlb", "MekkCyber", "vasqu", "ivarflakstad", "stevhliu", "ebezzam", "remi-or", "itazap"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
|
||||
uses: ./.github/workflows/get-pr-number.yml
|
||||
outputs:
|
||||
PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
|
||||
steps:
|
||||
- name: Get PR number
|
||||
shell: bash
|
||||
run: |
|
||||
if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then
|
||||
echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
|
||||
else
|
||||
echo "PR_NUMBER=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
get-pr-info:
|
||||
name: Get PR commit SHA
|
||||
- name: Check PR number
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ env.PR_NUMBER }}"
|
||||
|
||||
- name: Set PR number
|
||||
id: set_pr_number
|
||||
run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
get-sha:
|
||||
runs-on: ubuntu-22.04
|
||||
needs: get-pr-number
|
||||
if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
|
||||
uses: ./.github/workflows/get-pr-info.yml
|
||||
with:
|
||||
pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
|
||||
|
||||
check-timestamps:
|
||||
name: Check timestamps (security check)
|
||||
runs-on: ubuntu-22.04
|
||||
needs: get-pr-info
|
||||
outputs:
|
||||
PR_HEAD_SHA: ${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}
|
||||
PR_MERGE_SHA: ${{ needs.get-pr-info.outputs.PR_MERGE_COMMIT_SHA }}
|
||||
PR_HEAD_SHA: ${{ steps.get_sha.outputs.PR_HEAD_SHA }}
|
||||
PR_MERGE_SHA: ${{ steps.get_sha.outputs.PR_MERGE_SHA }}
|
||||
steps:
|
||||
- name: Verify `merge_commit` timestamp is older than the issue comment timestamp
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: "0"
|
||||
ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"
|
||||
|
||||
- name: Get SHA (and verify timestamps against the issue comment date)
|
||||
id: get_sha
|
||||
env:
|
||||
PR_NUMBER: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
|
||||
COMMENT_DATE: ${{ github.event.comment.created_at }}
|
||||
PR_MERGE_COMMIT_TIMESTAMP: ${{ needs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }}
|
||||
run: |
|
||||
git fetch origin refs/pull/$PR_NUMBER/head:refs/remotes/pull/$PR_NUMBER/head
|
||||
git checkout refs/remotes/pull/$PR_NUMBER/head
|
||||
echo "PR_HEAD_SHA: $(git log -1 --format=%H)"
|
||||
echo "PR_HEAD_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT"
|
||||
git fetch origin refs/pull/$PR_NUMBER/merge:refs/remotes/pull/$PR_NUMBER/merge
|
||||
git checkout refs/remotes/pull/$PR_NUMBER/merge
|
||||
echo "PR_MERGE_SHA: $(git log -1 --format=%H)"
|
||||
echo "PR_MERGE_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT"
|
||||
PR_MERGE_COMMIT_TIMESTAMP=$(git log -1 --date=unix --format=%cd)
|
||||
echo "PR_MERGE_COMMIT_TIMESTAMP: $PR_MERGE_COMMIT_TIMESTAMP"
|
||||
COMMENT_TIMESTAMP=$(date -d "${COMMENT_DATE}" +"%s")
|
||||
echo "COMMENT_DATE: $COMMENT_DATE"
|
||||
echo "COMMENT_TIMESTAMP: $COMMENT_TIMESTAMP"
|
||||
@ -59,10 +87,13 @@ jobs:
|
||||
exit -1;
|
||||
fi
|
||||
|
||||
# use a python script to handle this complex logic.
|
||||
# use a python script to handle this complex logic
|
||||
# case 1: `run-slow` (auto. infer with limited number of models, but in particular, new model)
|
||||
# case 2: `run-slow model_1, model_2`
|
||||
get-tests:
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [get-pr-number, check-timestamps]
|
||||
needs: [get-pr-number, get-sha]
|
||||
if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
|
||||
outputs:
|
||||
models: ${{ steps.models_to_run.outputs.models }}
|
||||
quantizations: ${{ steps.models_to_run.outputs.quantizations }}
|
||||
@ -70,11 +101,11 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: "0"
|
||||
ref: "refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge"
|
||||
ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"
|
||||
|
||||
- name: Verify merge commit SHA
|
||||
env:
|
||||
VERIFIED_PR_MERGE_SHA: ${{ needs.check-timestamps.outputs.PR_MERGE_SHA }}
|
||||
VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
|
||||
run: |
|
||||
PR_MERGE_SHA=$(git log -1 --format=%H)
|
||||
if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
|
||||
@ -95,33 +126,11 @@ jobs:
|
||||
- name: Show models to test
|
||||
id: models_to_run
|
||||
run: |
|
||||
echo "$models"
|
||||
echo "models=$models" >> $GITHUB_OUTPUT
|
||||
echo "$quantizations"
|
||||
echo "quantizations=$quantizations" >> $GITHUB_OUTPUT
|
||||
|
||||
# Report back if we are not able to get the tests (for example, security check is failing)
|
||||
report_error_earlier:
|
||||
name: Report error earlier
|
||||
if: ${{ always() && needs.get-pr-info.result == 'success' && needs.get-tests.result != 'success' }}
|
||||
needs: [get-pr-number, get-pr-info, get-tests]
|
||||
permissions:
|
||||
pull-requests: write
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Reply to the comment
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
github_repository: ${{ github.repository }}
|
||||
pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
|
||||
run: |
|
||||
gh api \
|
||||
--method POST \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"repos/${github_repository}/issues/${pr_number}/comments" \
|
||||
-f body="💔 This comment contains \`run-slow\`, but unknown error occurred and [the workflow run]($GITHUB_RUN_URL) aborted!"
|
||||
echo "${{ env.models }}"
|
||||
echo "models=${{ env.models }}" >> $GITHUB_ENV
|
||||
echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
|
||||
echo "${{ env.quantizations }}"
|
||||
echo "quantizations=${{ env.quantizations }}" >> $GITHUB_OUTPUT
|
||||
|
||||
reply_to_comment:
|
||||
name: Reply to the comment
|
||||
@ -134,20 +143,20 @@ jobs:
|
||||
- name: Reply to the comment
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
BODY: '\n\nmodels: ${{ needs.get-tests.outputs.models }}\nquantizations: ${{ needs.get-tests.outputs.quantizations }}'
|
||||
github_repository: ${{ github.repository }}
|
||||
pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
|
||||
MODELS: ${{ needs.get-tests.outputs.models }}
|
||||
BODY: "\n\nmodels: ${{ needs.get-tests.outputs.models }}\nquantizations: ${{ needs.get-tests.outputs.quantizations }}"
|
||||
run: |
|
||||
gh api \
|
||||
--method POST \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"repos/${github_repository}/issues/${pr_number}/comments" \
|
||||
-f body="This comment contains \`run-slow\`, running the specified jobs: $(echo -e "$BODY")"
|
||||
repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \
|
||||
-f "body=This comment contains run-slow, running the specified jobs: ${{ env.BODY }} ..."
|
||||
|
||||
create_run:
|
||||
name: Create run
|
||||
needs: [check-timestamps, reply_to_comment]
|
||||
if: ${{ needs.get-tests.outputs.models != '[]' || needs.get-tests.outputs.quantizations != '[]' }}
|
||||
needs: [get-sha, get-tests, reply_to_comment]
|
||||
permissions:
|
||||
statuses: write
|
||||
runs-on: ubuntu-22.04
|
||||
@ -159,196 +168,248 @@ jobs:
|
||||
# Create a commit status (pending) for a run of this workflow. The status has to be updated later in `update_run_status`.
|
||||
# See https://docs.github.com/en/rest/commits/statuses?apiVersion=2022-11-28#create-a-commit-status
|
||||
GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
github_repository: ${{ github.repository }}
|
||||
pr_head_sha: ${{ needs.check-timestamps.outputs.PR_HEAD_SHA }}
|
||||
run: |
|
||||
gh api \
|
||||
--method POST \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"repos/${github_repository}/statuses/${pr_head_sha}" \
|
||||
repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
|
||||
-f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Slow CI job" -f "context=pytest/custom-tests"
|
||||
|
||||
model-ci:
|
||||
name: Model CI
|
||||
run_models_gpu:
|
||||
name: Run all tests for the model
|
||||
if: ${{ needs.get-tests.outputs.models != '[]' }}
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
needs: [get-pr-number, check-timestamps, get-tests, create_run]
|
||||
with:
|
||||
job: run_models_gpu
|
||||
slack_report_channel: "#transformers-ci-pr"
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
ci_event: PR Comment CI
|
||||
report_repo_id: hf-internal-testing/transformers_pr_ci
|
||||
commit_sha: ${{ needs.check-timestamps.outputs.PR_MERGE_SHA }}
|
||||
subdirs: ${{ needs.get-tests.outputs.models }}
|
||||
pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
|
||||
secrets: inherit
|
||||
needs: [get-pr-number, get-sha, get-tests, create_run]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
folders: ${{ fromJson(needs.get-tests.outputs.models) }}
|
||||
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Echo input and matrix info
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ matrix.folders }}"
|
||||
|
||||
quantization-ci:
|
||||
name: Quantization CI
|
||||
- name: Echo folder ${{ matrix.folders }}
|
||||
shell: bash
|
||||
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
|
||||
# set the artifact folder names (because the character `/` is not allowed).
|
||||
run: |
|
||||
echo "${{ matrix.folders }}"
|
||||
matrix_folders=${{ matrix.folders }}
|
||||
matrix_folders=${matrix_folders/'models/'/'models_'}
|
||||
echo "$matrix_folders"
|
||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||
|
||||
- name: Checkout to PR merge commit
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
|
||||
git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
|
||||
git log -1 --format=%H
|
||||
|
||||
- name: Verify merge commit SHA
|
||||
env:
|
||||
VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
PR_MERGE_SHA=$(git log -1 --format=%H)
|
||||
if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
|
||||
echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
|
||||
exit -1;
|
||||
fi
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
working-directory: /transformers
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
fi
|
||||
echo "$machine_type"
|
||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run all tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
|
||||
echo $CUDA_VISIBLE_DEVICES
|
||||
python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
|
||||
|
||||
- name: Make sure report directory exists
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
|
||||
echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
|
||||
echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
|
||||
path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
|
||||
|
||||
run_quantization_torch_gpu:
|
||||
name: Run all tests for a quantization
|
||||
if: ${{ needs.get-tests.outputs.quantizations != '[]' }}
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
needs: [get-pr-number, check-timestamps, get-tests, create_run]
|
||||
with:
|
||||
job: run_quantization_torch_gpu
|
||||
slack_report_channel: "#transformers-ci-pr"
|
||||
docker: huggingface/transformers-quantization-latest-gpu
|
||||
ci_event: PR Comment CI
|
||||
report_repo_id: hf-internal-testing/transformers_pr_ci
|
||||
commit_sha: ${{ needs.check-timestamps.outputs.PR_MERGE_SHA }}
|
||||
subdirs: ${{ needs.get-tests.outputs.quantizations }}
|
||||
pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
|
||||
secrets: inherit
|
||||
needs: [get-pr-number, get-sha, get-tests, create_run]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
folders: ${{ fromJson(needs.get-tests.outputs.quantizations) }}
|
||||
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
image: huggingface/transformers-quantization-latest-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Echo folder ${{ matrix.folders }}
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ matrix.folders }}"
|
||||
matrix_folders=${{ matrix.folders }}
|
||||
matrix_folders=${matrix_folders/'quantization/'/'quantization_'}
|
||||
echo "$matrix_folders"
|
||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||
|
||||
report:
|
||||
name: Check & Report
|
||||
needs: [get-pr-number, check-timestamps, create_run, model-ci, quantization-ci]
|
||||
- name: Checkout to PR merge commit
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
|
||||
git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
|
||||
git log -1 --format=%H
|
||||
|
||||
- name: Verify merge commit SHA
|
||||
env:
|
||||
VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
PR_MERGE_SHA=$(git log -1 --format=%H)
|
||||
if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
|
||||
echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
|
||||
exit -1;
|
||||
fi
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
working-directory: /transformers
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
fi
|
||||
echo "$machine_type"
|
||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run quantization tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
|
||||
|
||||
- name: Make sure report directory exists
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir -p /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports
|
||||
echo "hello" > /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports/hello.txt
|
||||
echo "${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports"
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
|
||||
path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
|
||||
|
||||
update_run_status:
|
||||
name: Update Check Run Status
|
||||
needs: [get-sha, create_run, run_models_gpu, run_quantization_torch_gpu]
|
||||
permissions:
|
||||
pull-requests: write
|
||||
statuses: write
|
||||
if: ${{ always() && needs.create_run.result == 'success' }}
|
||||
runs-on: ubuntu-22.04
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
STATUS_OK: ${{ contains(fromJSON('["skipped", "success"]'), needs.run_models_gpu.result) && contains(fromJSON('["skipped", "success"]'), needs.run_quantization_torch_gpu.result) }}
|
||||
steps:
|
||||
- name: Show reports from jobs
|
||||
env:
|
||||
MODEL_REPORT: ${{ needs.model-ci.outputs.report }}
|
||||
QUANT_REPORT: ${{ needs.quantization-ci.outputs.report }}
|
||||
- name: Get `run_models_gpu` job status
|
||||
run: |
|
||||
echo "$MODEL_REPORT"
|
||||
echo "$QUANT_REPORT"
|
||||
|
||||
- name: Process and filter reports
|
||||
env:
|
||||
MODEL_REPORT: ${{ needs.model-ci.outputs.report }}
|
||||
QUANT_REPORT: ${{ needs.quantization-ci.outputs.report }}
|
||||
run: |
|
||||
# Preprocess with Python
|
||||
python3 << 'PYTHON_SCRIPT'
|
||||
import json
|
||||
import os
|
||||
|
||||
def filter_and_format_report(data):
|
||||
"""
|
||||
Filter out entries where commit is `None` (failing tests who status is not certain) and format as text
|
||||
"""
|
||||
lines = []
|
||||
|
||||
for model, model_result in data.items():
|
||||
model_lines = []
|
||||
for device, failures in model_result.items():
|
||||
|
||||
# Filter out None commits and extract just the test names
|
||||
test_names = [
|
||||
failure['test']
|
||||
for failure in failures
|
||||
if isinstance(failure, dict) and failure.get('commit') is not None
|
||||
]
|
||||
|
||||
# Add tests to model lines
|
||||
for idx, test_name in enumerate(test_names):
|
||||
if idx == 0:
|
||||
job_link = failures[idx]['job_link']
|
||||
model_lines.append(f"- [{model}]({job_link}):")
|
||||
|
||||
model_lines.append(f" {test_name}")
|
||||
|
||||
# Only add model section if it has tests
|
||||
if len(model_lines) > 0:
|
||||
lines.extend(model_lines)
|
||||
lines.append("") # Empty line between models
|
||||
|
||||
return "\n".join(lines).strip()
|
||||
|
||||
# Load and filter reports
|
||||
model_report_str = os.environ.get('MODEL_REPORT', '{}')
|
||||
quant_report_str = os.environ.get('QUANT_REPORT', '{}')
|
||||
|
||||
model_report = json.loads(model_report_str) if model_report_str else {}
|
||||
quant_report = json.loads(quant_report_str) if quant_report_str else {}
|
||||
|
||||
formatted_model = filter_and_format_report(model_report)
|
||||
formatted_quant = filter_and_format_report(quant_report)
|
||||
|
||||
# Write to files
|
||||
with open('model_ci.txt', 'w') as f:
|
||||
f.write(formatted_model)
|
||||
if formatted_model:
|
||||
f.write('\n')
|
||||
|
||||
with open('quantization_ci.txt', 'w') as f:
|
||||
f.write(formatted_quant)
|
||||
if formatted_quant:
|
||||
f.write('\n')
|
||||
PYTHON_SCRIPT
|
||||
|
||||
- name: Post results as PR comment
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
github_repository: ${{ github.repository }}
|
||||
pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
|
||||
model_ci_result: ${{ needs.model-ci.result }}
|
||||
quantization_ci_result: ${{ needs.quantization-ci.result }}
|
||||
run: |
|
||||
{
|
||||
echo '## CI Results'
|
||||
echo "[Workflow Run ⚙️]($GITHUB_RUN_URL)"
|
||||
echo ''
|
||||
|
||||
# Check if both jobs were skipped or cancelled
|
||||
if [[ "$model_ci_result" == "skipped" || "$model_ci_result" == "cancelled" ]] && \
|
||||
[[ "$quantization_ci_result" == "skipped" || "$quantization_ci_result" == "cancelled" ]]; then
|
||||
echo '⚠️ No test being reported (jobs are skipped or cancelled)!'
|
||||
echo "STATUS=error" >> $GITHUB_ENV
|
||||
|
||||
# Check if either file has content
|
||||
elif [ -s model_ci.txt ] || [ -s quantization_ci.txt ]; then
|
||||
echo "STATUS=failure" >> $GITHUB_ENV
|
||||
|
||||
# Check if model_ci.txt has content
|
||||
if [ -s model_ci.txt ]; then
|
||||
echo '### Model CI Report'
|
||||
echo ''
|
||||
echo '#### ❌ Failed tests'
|
||||
echo ''
|
||||
cat model_ci.txt
|
||||
echo ''
|
||||
fi
|
||||
|
||||
# Check if quantization_ci.txt has content
|
||||
if [ -s quantization_ci.txt ]; then
|
||||
echo '### Quantization CI Report'
|
||||
echo ''
|
||||
echo '#### ❌ Failed tests'
|
||||
echo ''
|
||||
cat quantization_ci.txt
|
||||
echo ''
|
||||
fi
|
||||
else
|
||||
echo "STATUS=success" >> $GITHUB_ENV
|
||||
echo '✅ No failing test specific to this PR 🎉 !'
|
||||
fi
|
||||
} > comment_body.txt
|
||||
|
||||
gh api \
|
||||
--method POST \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"repos/${github_repository}/issues/${pr_number}/comments" \
|
||||
-F body=@comment_body.txt
|
||||
echo "${{ needs.run_models_gpu.result }}"
|
||||
echo "${{ needs.run_quantization_torch_gpu.result }}"
|
||||
echo $STATUS_OK
|
||||
if [ "$STATUS_OK" = "true" ]; then
|
||||
echo "STATUS=success" >> $GITHUB_ENV
|
||||
else
|
||||
echo "STATUS=failure" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Update PR commit statuses
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
github_repository: ${{ github.repository }}
|
||||
pr_head_sha: ${{ needs.check-timestamps.outputs.PR_HEAD_SHA }}
|
||||
# The env. variable `STATUS` used here is set in the previous step
|
||||
run: |
|
||||
echo "${{ needs.run_models_gpu.result }}"
|
||||
echo "${{ env.STATUS }}"
|
||||
gh api \
|
||||
--method POST \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
"repos/${github_repository}/statuses/${pr_head_sha}" \
|
||||
-f "target_url=$GITHUB_RUN_URL" -f "state=$STATUS" -f "description=Slow CI job" -f "context=pytest/custom-tests"
|
||||
repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
|
||||
-f "target_url=$GITHUB_RUN_URL" -f "state=${{ env.STATUS }}" -f "description=Slow CI job" -f "context=pytest/custom-tests"
|
||||
|
||||
1
.github/workflows/self-nightly-caller.yml
vendored
1
.github/workflows/self-nightly-caller.yml
vendored
@ -51,7 +51,6 @@ jobs:
|
||||
slack_report_channel: "#transformers-ci-past-future"
|
||||
docker: huggingface/transformers-all-latest-torch-nightly-gpu
|
||||
ci_event: Nightly CI
|
||||
runner_type: "a10"
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci_with_torch_nightly
|
||||
commit_sha: ${{ github.event.workflow_run.head_sha || github.sha }}
|
||||
secrets: inherit
|
||||
|
||||
25
.github/workflows/self-push-amd-mi210-caller.yml
vendored
Normal file
25
.github/workflows/self-push-amd-mi210-caller.yml
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
name: Self-hosted runner (AMD mi210 CI caller)
|
||||
|
||||
on:
|
||||
#workflow_run:
|
||||
# workflows: ["Self-hosted runner (push-caller)"]
|
||||
# branches: ["main"]
|
||||
# types: [completed]
|
||||
push:
|
||||
branches:
|
||||
- run_amd_push_ci_caller*
|
||||
paths:
|
||||
- "src/**"
|
||||
- "tests/**"
|
||||
- ".github/**"
|
||||
- "templates/**"
|
||||
- "utils/**"
|
||||
|
||||
jobs:
|
||||
run_amd_ci:
|
||||
name: AMD mi210
|
||||
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
|
||||
uses: ./.github/workflows/self-push-amd.yml
|
||||
with:
|
||||
gpu_flavor: mi210
|
||||
secrets: inherit
|
||||
25
.github/workflows/self-push-amd-mi250-caller.yml
vendored
Normal file
25
.github/workflows/self-push-amd-mi250-caller.yml
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
name: Self-hosted runner (AMD mi250 CI caller)
|
||||
|
||||
on:
|
||||
#workflow_run:
|
||||
# workflows: ["Self-hosted runner (push-caller)"]
|
||||
# branches: ["main"]
|
||||
# types: [completed]
|
||||
push:
|
||||
branches:
|
||||
- run_amd_push_ci_caller*
|
||||
paths:
|
||||
- "src/**"
|
||||
- "tests/**"
|
||||
- ".github/**"
|
||||
- "templates/**"
|
||||
- "utils/**"
|
||||
|
||||
jobs:
|
||||
run_amd_ci:
|
||||
name: AMD mi250
|
||||
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
|
||||
uses: ./.github/workflows/self-push-amd.yml
|
||||
with:
|
||||
gpu_flavor: mi250
|
||||
secrets: inherit
|
||||
334
.github/workflows/self-push-amd.yml
vendored
Normal file
334
.github/workflows/self-push-amd.yml
vendored
Normal file
@ -0,0 +1,334 @@
|
||||
name: Self-hosted runner AMD GPU (push)
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
gpu_flavor:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
PYTEST_TIMEOUT: 60
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
|
||||
jobs:
|
||||
check_runner_status:
|
||||
name: Check Runner Status
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout transformers
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Check Runner Status
|
||||
run: python utils/check_self_hosted_runner.py --target_runners amd-mi210-single-gpu-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
|
||||
check_runners:
|
||||
name: Check Runners
|
||||
needs: check_runner_status
|
||||
strategy:
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
|
||||
container:
|
||||
image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now
|
||||
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: ROCM-SMI
|
||||
run: |
|
||||
rocm-smi
|
||||
- name: ROCM-INFO
|
||||
run: |
|
||||
rocminfo | grep "Agent" -A 14
|
||||
- name: Show ROCR environment
|
||||
run: |
|
||||
echo "ROCR: $ROCR_VISIBLE_DEVICES"
|
||||
|
||||
setup_gpu:
|
||||
name: Setup
|
||||
needs: check_runners
|
||||
strategy:
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
|
||||
container:
|
||||
image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now
|
||||
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
test_map: ${{ steps.set-matrix.outputs.test_map }}
|
||||
env:
|
||||
# `CI_BRANCH_PUSH`: The branch name from the push event
|
||||
# `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
|
||||
# `CI_SHA_PUSH`: The commit SHA from the push event
|
||||
# `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
|
||||
CI_BRANCH_PUSH: ${{ github.event.ref }}
|
||||
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
|
||||
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
|
||||
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
|
||||
steps:
|
||||
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
||||
# We also take into account the `push` event (we might want to test some changes in a branch)
|
||||
- name: Prepare custom environment variables
|
||||
shell: bash
|
||||
# `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty)
|
||||
# `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty)
|
||||
run: |
|
||||
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
||||
echo $CI_BRANCH_PUSH
|
||||
echo $CI_BRANCH_WORKFLOW_RUN
|
||||
echo $CI_SHA_PUSH
|
||||
echo $CI_SHA_WORKFLOW_RUN
|
||||
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
|
||||
- name: print environment variables
|
||||
run: |
|
||||
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
||||
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
||||
|
||||
- name: Update clone using environment variables
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
echo "original branch = $(git branch --show-current)"
|
||||
git fetch && git checkout ${{ env.CI_BRANCH }}
|
||||
echo "updated branch = $(git branch --show-current)"
|
||||
git checkout ${{ env.CI_SHA }}
|
||||
echo "log = $(git log -n 1)"
|
||||
|
||||
- name: Cleanup
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
rm -rf tests/__pycache__
|
||||
rm -rf tests/models/__pycache__
|
||||
rm -rf reports
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Fetch the tests to run
|
||||
working-directory: /transformers
|
||||
# TODO: add `git-python` in the docker images
|
||||
run: |
|
||||
pip install --upgrade git-python
|
||||
python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
|
||||
|
||||
- name: Report fetched tests
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: test_fetched
|
||||
path: /transformers/test_preparation.txt
|
||||
|
||||
- id: set-matrix
|
||||
name: Organize tests into models
|
||||
working-directory: /transformers
|
||||
# The `keys` is used as GitHub actions matrix for jobs, i.e. `models/bert`, `tokenization`, `pipeline`, etc.
|
||||
# The `test_map` is used to get the actual identified test files under each key.
|
||||
# If no test to run (so no `test_map.json` file), create a dummy map (empty matrix will fail)
|
||||
run: |
|
||||
if [ -f test_map.json ]; then
|
||||
keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)')
|
||||
test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)')
|
||||
else
|
||||
keys=$(python3 -c 'keys = ["dummy"]; print(keys)')
|
||||
test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)')
|
||||
fi
|
||||
echo $keys
|
||||
echo $test_map
|
||||
echo "matrix=$keys" >> $GITHUB_OUTPUT
|
||||
echo "test_map=$test_map" >> $GITHUB_OUTPUT
|
||||
|
||||
run_models_gpu:
|
||||
name: Model tests
|
||||
needs: setup_gpu
|
||||
# `dummy` means there is no test to run
|
||||
if: contains(fromJson(needs.setup_gpu.outputs.matrix), 'dummy') != true
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }}
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
|
||||
container:
|
||||
image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now
|
||||
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
env:
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
CI_BRANCH_PUSH: ${{ github.event.ref }}
|
||||
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
|
||||
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
|
||||
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
|
||||
steps:
|
||||
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
||||
# We also take into account the `push` event (we might want to test some changes in a branch)
|
||||
- name: Prepare custom environment variables
|
||||
shell: bash
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
run: |
|
||||
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
||||
echo $CI_BRANCH_PUSH
|
||||
echo $CI_BRANCH_WORKFLOW_RUN
|
||||
echo $CI_SHA_PUSH
|
||||
echo $CI_SHA_WORKFLOW_RUN
|
||||
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
|
||||
- name: print environment variables
|
||||
run: |
|
||||
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
||||
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
||||
|
||||
- name: Update clone using environment variables
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
echo "original branch = $(git branch --show-current)"
|
||||
git fetch && git checkout ${{ env.CI_BRANCH }}
|
||||
echo "updated branch = $(git branch --show-current)"
|
||||
git checkout ${{ env.CI_SHA }}
|
||||
echo "log = $(git log -n 1)"
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: Echo folder ${{ matrix.folders }}
|
||||
shell: bash
|
||||
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
|
||||
# set the artifact folder names (because the character `/` is not allowed).
|
||||
run: |
|
||||
echo "${{ matrix.folders }}"
|
||||
echo "${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }}"
|
||||
matrix_folders=${{ matrix.folders }}
|
||||
matrix_folders=${matrix_folders/'models/'/'models_'}
|
||||
echo "$matrix_folders"
|
||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||
|
||||
- name: ROCM-SMI
|
||||
run: |
|
||||
rocm-smi
|
||||
- name: ROCM-INFO
|
||||
run: |
|
||||
rocminfo | grep "Agent" -A 14
|
||||
- name: Show ROCR environment
|
||||
run: |
|
||||
echo "ROCR: $ROCR_VISIBLE_DEVICES"
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run all non-slow selected tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} -m "not not_device_test"
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
|
||||
|
||||
send_results:
|
||||
name: Send results to webhook
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
needs: [
|
||||
check_runner_status,
|
||||
check_runners,
|
||||
setup_gpu,
|
||||
run_models_gpu,
|
||||
# run_tests_torch_cuda_extensions_single_gpu,
|
||||
# run_tests_torch_cuda_extensions_multi_gpu
|
||||
]
|
||||
env:
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
CI_BRANCH_PUSH: ${{ github.event.ref }}
|
||||
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
|
||||
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
|
||||
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
|
||||
steps:
|
||||
- name: Preliminary job status
|
||||
shell: bash
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
run: |
|
||||
echo "Runner availability: ${{ needs.check_runner_status.result }}"
|
||||
echo "Setup status: ${{ needs.setup_gpu.result }}"
|
||||
echo "Runner status: ${{ needs.check_runners.result }}"
|
||||
|
||||
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
||||
# We also take into account the `push` event (we might want to test some changes in a branch)
|
||||
- name: Prepare custom environment variables
|
||||
shell: bash
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
run: |
|
||||
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
||||
echo $CI_BRANCH_PUSH
|
||||
echo $CI_BRANCH_WORKFLOW_RUN
|
||||
echo $CI_SHA_PUSH
|
||||
echo $CI_SHA_WORKFLOW_RUN
|
||||
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
|
||||
- name: print environment variables
|
||||
run: |
|
||||
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
||||
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
# To avoid failure when multiple commits are merged into `main` in a short period of time.
|
||||
# Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ...
|
||||
# (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit)
|
||||
with:
|
||||
fetch-depth: 20
|
||||
|
||||
- name: Update clone using environment variables
|
||||
run: |
|
||||
echo "original branch = $(git branch --show-current)"
|
||||
git fetch && git checkout ${{ env.CI_BRANCH }}
|
||||
echo "updated branch = $(git branch --show-current)"
|
||||
git checkout ${{ env.CI_SHA }}
|
||||
echo "log = $(git log -n 1)"
|
||||
|
||||
- uses: actions/download-artifact@v4
|
||||
- name: Send message to Slack
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
|
||||
CI_SLACK_CHANNEL_ID_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }}
|
||||
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
||||
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }}
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_EVENT: Push CI (AMD) - ${{ inputs.gpu_flavor }}
|
||||
CI_TITLE_PUSH: ${{ github.event.head_commit.message }}
|
||||
CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }}
|
||||
CI_SHA: ${{ env.CI_SHA }}
|
||||
RUNNER_STATUS: ${{ needs.check_runner_status.result }}
|
||||
RUNNER_ENV_STATUS: ${{ needs.check_runners.result }}
|
||||
SETUP_STATUS: ${{ needs.setup_gpu.result }}
|
||||
|
||||
# We pass `needs.setup_gpu.outputs.matrix` as the argument. A processing in `notification_service.py` to change
|
||||
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
|
||||
run: |
|
||||
pip install huggingface_hub
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}"
|
||||
54
.github/workflows/self-push-caller.yml
vendored
Normal file
54
.github/workflows/self-push-caller.yml
vendored
Normal file
@ -0,0 +1,54 @@
|
||||
# Used to trigger self-push CI
|
||||
name: Self-hosted runner (push-caller)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "src/**"
|
||||
- "tests/**"
|
||||
- ".github/**"
|
||||
- "templates/**"
|
||||
- "utils/**"
|
||||
|
||||
jobs:
|
||||
check-for-setup:
|
||||
runs-on: ubuntu-22.04
|
||||
name: Check if setup was changed
|
||||
outputs:
|
||||
changed: ${{ steps.was_changed.outputs.changed }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: "2"
|
||||
|
||||
- name: Get changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@1c8e6069583811afb28f97afeaf8e7da80c6be5c
|
||||
|
||||
- name: Was setup changed
|
||||
id: was_changed
|
||||
run: |
|
||||
for file in ${{ steps.changed-files.outputs.all_changed_files }}; do
|
||||
if [ `basename "${file}"` = "setup.py" ]; then
|
||||
echo "changed=1" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
done
|
||||
|
||||
build-docker-containers:
|
||||
needs: check-for-setup
|
||||
if: (github.event_name == 'push') && (needs.check-for-setup.outputs.changed == '1')
|
||||
uses: ./.github/workflows/build-docker-images.yml
|
||||
with:
|
||||
image_postfix: "-push-ci"
|
||||
secrets: inherit
|
||||
|
||||
run_push_ci:
|
||||
name: Trigger Push CI
|
||||
runs-on: ubuntu-22.04
|
||||
if: ${{ always() }}
|
||||
needs: build-docker-containers
|
||||
steps:
|
||||
- name: Trigger push CI via workflow_run
|
||||
run: echo "Trigger push CI via workflow_run"
|
||||
652
.github/workflows/self-push.yml
vendored
Normal file
652
.github/workflows/self-push.yml
vendored
Normal file
@ -0,0 +1,652 @@
|
||||
name: Self-hosted runner (push)
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows: ["Self-hosted runner (push-caller)"]
|
||||
branches: ["main"]
|
||||
types: [completed]
|
||||
push:
|
||||
branches:
|
||||
- ci_*
|
||||
- ci-*
|
||||
paths:
|
||||
- "src/**"
|
||||
- "tests/**"
|
||||
- ".github/**"
|
||||
- "templates/**"
|
||||
- "utils/**"
|
||||
repository_dispatch:
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
PYTEST_TIMEOUT: 60
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
name: Setup
|
||||
strategy:
|
||||
matrix:
|
||||
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu-push-ci
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
test_map: ${{ steps.set-matrix.outputs.test_map }}
|
||||
env:
|
||||
# `CI_BRANCH_PUSH`: The branch name from the push event
|
||||
# `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
|
||||
# `CI_SHA_PUSH`: The commit SHA from the push event
|
||||
# `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
|
||||
CI_BRANCH_PUSH: ${{ github.event.ref }}
|
||||
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
|
||||
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
|
||||
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
|
||||
steps:
|
||||
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
||||
# We also take into account the `push` event (we might want to test some changes in a branch)
|
||||
- name: Prepare custom environment variables
|
||||
shell: bash
|
||||
# `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty)
|
||||
# `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty)
|
||||
run: |
|
||||
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
||||
echo $CI_BRANCH_PUSH
|
||||
echo $CI_BRANCH_WORKFLOW_RUN
|
||||
echo $CI_SHA_PUSH
|
||||
echo $CI_SHA_WORKFLOW_RUN
|
||||
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
|
||||
- name: print environment variables
|
||||
run: |
|
||||
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
||||
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
||||
|
||||
- name: Update clone using environment variables
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
echo "original branch = $(git branch --show-current)"
|
||||
git fetch && git checkout ${{ env.CI_BRANCH }}
|
||||
echo "updated branch = $(git branch --show-current)"
|
||||
git checkout ${{ env.CI_SHA }}
|
||||
echo "log = $(git log -n 1)"
|
||||
|
||||
- name: Cleanup
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
rm -rf tests/__pycache__
|
||||
rm -rf tests/models/__pycache__
|
||||
rm -rf reports
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Fetch the tests to run
|
||||
working-directory: /transformers
|
||||
# TODO: add `git-python` in the docker images
|
||||
run: |
|
||||
pip install --upgrade git-python
|
||||
python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
|
||||
|
||||
- name: Report fetched tests
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: test_fetched
|
||||
path: /transformers/test_preparation.txt
|
||||
|
||||
- id: set-matrix
|
||||
name: Organize tests into models
|
||||
working-directory: /transformers
|
||||
# The `keys` is used as GitHub actions matrix for jobs, i.e. `models/bert`, `tokenization`, `pipeline`, etc.
|
||||
# The `test_map` is used to get the actual identified test files under each key.
|
||||
# If no test to run (so no `test_map.json` file), create a dummy map (empty matrix will fail)
|
||||
run: |
|
||||
if [ -f test_map.json ]; then
|
||||
keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)')
|
||||
test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)')
|
||||
else
|
||||
keys=$(python3 -c 'keys = ["dummy"]; print(keys)')
|
||||
test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)')
|
||||
fi
|
||||
echo $keys
|
||||
echo $test_map
|
||||
echo "matrix=$keys" >> $GITHUB_OUTPUT
|
||||
echo "test_map=$test_map" >> $GITHUB_OUTPUT
|
||||
|
||||
run_tests_single_gpu:
|
||||
name: Model tests
|
||||
needs: setup
|
||||
# `dummy` means there is no test to run
|
||||
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
||||
machine_type: [aws-g5-4xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu-push-ci
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
env:
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
CI_BRANCH_PUSH: ${{ github.event.ref }}
|
||||
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
|
||||
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
|
||||
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
|
||||
steps:
|
||||
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
||||
# We also take into account the `push` event (we might want to test some changes in a branch)
|
||||
- name: Prepare custom environment variables
|
||||
shell: bash
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
run: |
|
||||
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
||||
echo $CI_BRANCH_PUSH
|
||||
echo $CI_BRANCH_WORKFLOW_RUN
|
||||
echo $CI_SHA_PUSH
|
||||
echo $CI_SHA_WORKFLOW_RUN
|
||||
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
|
||||
- name: print environment variables
|
||||
run: |
|
||||
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
||||
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
||||
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
working-directory: /transformers
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
fi
|
||||
|
||||
echo "$machine_type"
|
||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||
|
||||
- name: Update clone using environment variables
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
echo "original branch = $(git branch --show-current)"
|
||||
git fetch && git checkout ${{ env.CI_BRANCH }}
|
||||
echo "updated branch = $(git branch --show-current)"
|
||||
git checkout ${{ env.CI_SHA }}
|
||||
echo "log = $(git log -n 1)"
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: Echo folder ${{ matrix.folders }}
|
||||
shell: bash
|
||||
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
|
||||
# set the artifact folder names (because the character `/` is not allowed).
|
||||
run: |
|
||||
echo "${{ matrix.folders }}"
|
||||
echo "${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}"
|
||||
matrix_folders=${{ matrix.folders }}
|
||||
matrix_folders=${matrix_folders/'models/'/'models_'}
|
||||
echo "$matrix_folders"
|
||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run all non-slow selected tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||
path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||
|
||||
run_tests_multi_gpu:
|
||||
name: Model tests
|
||||
needs: setup
|
||||
# `dummy` means there is no test to run
|
||||
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
||||
machine_type: [aws-g5-12xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu-push-ci
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
env:
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
CI_BRANCH_PUSH: ${{ github.event.ref }}
|
||||
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
|
||||
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
|
||||
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
|
||||
steps:
|
||||
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
||||
# We also take into account the `push` event (we might want to test some changes in a branch)
|
||||
- name: Prepare custom environment variables
|
||||
shell: bash
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
run: |
|
||||
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
||||
echo $CI_BRANCH_PUSH
|
||||
echo $CI_BRANCH_WORKFLOW_RUN
|
||||
echo $CI_SHA_PUSH
|
||||
echo $CI_SHA_WORKFLOW_RUN
|
||||
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
|
||||
- name: print environment variables
|
||||
run: |
|
||||
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
||||
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
||||
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
working-directory: /transformers
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
fi
|
||||
|
||||
echo "$machine_type"
|
||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||
|
||||
- name: Update clone using environment variables
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
echo "original branch = $(git branch --show-current)"
|
||||
git fetch && git checkout ${{ env.CI_BRANCH }}
|
||||
echo "updated branch = $(git branch --show-current)"
|
||||
git checkout ${{ env.CI_SHA }}
|
||||
echo "log = $(git log -n 1)"
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: Echo folder ${{ matrix.folders }}
|
||||
shell: bash
|
||||
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
|
||||
# set the artifact folder names (because the character `/` is not allowed).
|
||||
run: |
|
||||
echo "${{ matrix.folders }}"
|
||||
echo "${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}"
|
||||
matrix_folders=${{ matrix.folders }}
|
||||
matrix_folders=${matrix_folders/'models/'/'models_'}
|
||||
echo "$matrix_folders"
|
||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run all non-slow selected tests on GPU
|
||||
env:
|
||||
MKL_SERVICE_FORCE_INTEL: 1
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||
path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||
|
||||
run_tests_torch_cuda_extensions_single_gpu:
|
||||
name: Torch CUDA extension tests
|
||||
needs: setup
|
||||
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [aws-g5-4xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
env:
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
CI_BRANCH_PUSH: ${{ github.event.ref }}
|
||||
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
|
||||
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
|
||||
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
|
||||
steps:
|
||||
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
||||
# We also take into account the `push` event (we might want to test some changes in a branch)
|
||||
- name: Prepare custom environment variables
|
||||
shell: bash
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
run: |
|
||||
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
||||
echo $CI_BRANCH_PUSH
|
||||
echo $CI_BRANCH_WORKFLOW_RUN
|
||||
echo $CI_SHA_PUSH
|
||||
echo $CI_SHA_WORKFLOW_RUN
|
||||
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
|
||||
- name: print environment variables
|
||||
run: |
|
||||
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
||||
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
||||
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
working-directory: /workspace/transformers
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
fi
|
||||
|
||||
echo "$machine_type"
|
||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||
|
||||
- name: Update clone using environment variables
|
||||
working-directory: /workspace/transformers
|
||||
run: |
|
||||
echo "original branch = $(git branch --show-current)"
|
||||
git fetch && git checkout ${{ env.CI_BRANCH }}
|
||||
echo "updated branch = $(git branch --show-current)"
|
||||
git checkout ${{ env.CI_SHA }}
|
||||
echo "log = $(git log -n 1)"
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /workspace/transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: Remove cached torch extensions
|
||||
run: rm -rf /github/home/.cache/torch_extensions/
|
||||
|
||||
# To avoid unknown test failures
|
||||
- name: Pre build DeepSpeed *again*
|
||||
working-directory: /workspace
|
||||
run: |
|
||||
python3 -m pip uninstall -y deepspeed
|
||||
DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /workspace/transformers
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /workspace/transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run all non-slow selected tests on GPU
|
||||
working-directory: /workspace/transformers
|
||||
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
|
||||
run: |
|
||||
python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
|
||||
path: /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
|
||||
|
||||
run_tests_torch_cuda_extensions_multi_gpu:
|
||||
name: Torch CUDA extension tests
|
||||
needs: setup
|
||||
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [aws-g5-12xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
env:
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
CI_BRANCH_PUSH: ${{ github.event.ref }}
|
||||
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
|
||||
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
|
||||
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
|
||||
steps:
|
||||
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
||||
# We also take into account the `push` event (we might want to test some changes in a branch)
|
||||
- name: Prepare custom environment variables
|
||||
shell: bash
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
run: |
|
||||
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
||||
echo $CI_BRANCH_PUSH
|
||||
echo $CI_BRANCH_WORKFLOW_RUN
|
||||
echo $CI_SHA_PUSH
|
||||
echo $CI_SHA_WORKFLOW_RUN
|
||||
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
|
||||
- name: print environment variables
|
||||
run: |
|
||||
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
||||
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
||||
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
working-directory: /workspace/transformers
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
fi
|
||||
|
||||
echo "$machine_type"
|
||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||
|
||||
- name: Update clone using environment variables
|
||||
working-directory: /workspace/transformers
|
||||
run: |
|
||||
echo "original branch = $(git branch --show-current)"
|
||||
git fetch && git checkout ${{ env.CI_BRANCH }}
|
||||
echo "updated branch = $(git branch --show-current)"
|
||||
git checkout ${{ env.CI_SHA }}
|
||||
echo "log = $(git log -n 1)"
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /workspace/transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: Remove cached torch extensions
|
||||
run: rm -rf /github/home/.cache/torch_extensions/
|
||||
|
||||
# To avoid unknown test failures
|
||||
- name: Pre build DeepSpeed *again*
|
||||
working-directory: /workspace
|
||||
run: |
|
||||
python3 -m pip uninstall -y deepspeed
|
||||
DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /workspace/transformers
|
||||
run: |
|
||||
python utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /workspace/transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run all non-slow selected tests on GPU
|
||||
working-directory: /workspace/transformers
|
||||
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
|
||||
run: |
|
||||
python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
|
||||
path: /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
|
||||
|
||||
send_results:
|
||||
name: Send results to webhook
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
needs: [
|
||||
setup,
|
||||
run_tests_single_gpu,
|
||||
run_tests_multi_gpu,
|
||||
run_tests_torch_cuda_extensions_single_gpu,
|
||||
run_tests_torch_cuda_extensions_multi_gpu
|
||||
]
|
||||
env:
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
CI_BRANCH_PUSH: ${{ github.event.ref }}
|
||||
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
|
||||
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
|
||||
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
|
||||
steps:
|
||||
- name: Preliminary job status
|
||||
shell: bash
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
run: |
|
||||
echo "Setup status: ${{ needs.setup.result }}"
|
||||
|
||||
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
||||
# We also take into account the `push` event (we might want to test some changes in a branch)
|
||||
- name: Prepare custom environment variables
|
||||
shell: bash
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
run: |
|
||||
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
|
||||
echo $CI_BRANCH_PUSH
|
||||
echo $CI_BRANCH_WORKFLOW_RUN
|
||||
echo $CI_SHA_PUSH
|
||||
echo $CI_SHA_WORKFLOW_RUN
|
||||
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
|
||||
|
||||
- name: print environment variables
|
||||
run: |
|
||||
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
|
||||
echo "env.CI_SHA = ${{ env.CI_SHA }}"
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
# To avoid failure when multiple commits are merged into `main` in a short period of time.
|
||||
# Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ...
|
||||
# (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit)
|
||||
with:
|
||||
fetch-depth: 20
|
||||
|
||||
- name: Update clone using environment variables
|
||||
run: |
|
||||
echo "original branch = $(git branch --show-current)"
|
||||
git fetch && git checkout ${{ env.CI_BRANCH }}
|
||||
echo "updated branch = $(git branch --show-current)"
|
||||
git checkout ${{ env.CI_SHA }}
|
||||
echo "log = $(git log -n 1)"
|
||||
|
||||
- uses: actions/download-artifact@v4
|
||||
- name: Send message to Slack
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
|
||||
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
||||
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_EVENT: push
|
||||
CI_TITLE_PUSH: ${{ github.event.head_commit.message }}
|
||||
CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }}
|
||||
CI_SHA: ${{ env.CI_SHA }}
|
||||
SETUP_STATUS: ${{ needs.setup.result }}
|
||||
|
||||
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
|
||||
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
|
||||
run: |
|
||||
pip install huggingface_hub
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
|
||||
@ -2,7 +2,7 @@ name: Self-hosted runner (AMD scheduled CI caller)
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "17 5 * * *"
|
||||
- cron: "17 2 * * *"
|
||||
|
||||
jobs:
|
||||
run_scheduled_amd_ci:
|
||||
|
||||
@ -21,7 +21,7 @@ jobs:
|
||||
job: run_models_gpu
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner_group: hfc-amd-mi355
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
docker: huggingface/testing-rocm7.0-preview
|
||||
ci_event: Scheduled CI (AMD) - mi355
|
||||
report_repo_id: hf-transformers-bot/transformers-ci-dummy
|
||||
secrets: inherit
|
||||
@ -33,7 +33,7 @@ jobs:
|
||||
job: run_pipelines_torch_gpu
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner_group: hfc-amd-mi355
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
docker: huggingface/testing-rocm7.0-preview
|
||||
ci_event: Scheduled CI (AMD) - mi355
|
||||
report_repo_id: hf-transformers-bot/transformers-ci-dummy
|
||||
secrets: inherit
|
||||
@ -45,7 +45,7 @@ jobs:
|
||||
job: run_examples_gpu
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner_group: hfc-amd-mi355
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
docker: huggingface/testing-rocm7.0-preview
|
||||
ci_event: Scheduled CI (AMD) - mi355
|
||||
report_repo_id: hf-transformers-bot/transformers-ci-dummy
|
||||
secrets: inherit
|
||||
|
||||
21
.github/workflows/self-scheduled-caller.yml
vendored
21
.github/workflows/self-scheduled-caller.yml
vendored
@ -33,13 +33,10 @@ jobs:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Setup
|
||||
env:
|
||||
prev_workflow_run_id: ${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}
|
||||
other_workflow_run_id: ${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}
|
||||
run: |
|
||||
mkdir "setup_values"
|
||||
echo "$prev_workflow_run_id" > "setup_values/prev_workflow_run_id.txt"
|
||||
echo "$other_workflow_run_id" > "setup_values/other_workflow_run_id.txt"
|
||||
echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt"
|
||||
echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt"
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
@ -66,7 +63,7 @@ jobs:
|
||||
with:
|
||||
job: run_pipelines_torch_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-pipeline-torch"
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
docker: huggingface/transformers-pytorch-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
commit_sha: ${{ github.sha }}
|
||||
@ -121,15 +118,3 @@ jobs:
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
commit_sha: ${{ github.sha }}
|
||||
secrets: inherit
|
||||
|
||||
kernels-ci:
|
||||
name: Kernels CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_kernels_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-kernels"
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
commit_sha: ${{ github.sha }}
|
||||
secrets: inherit
|
||||
@ -1,60 +0,0 @@
|
||||
name: Nvidia CI - Flash Attn
|
||||
|
||||
on:
|
||||
repository_dispatch:
|
||||
schedule:
|
||||
- cron: "17 2 * * *"
|
||||
push:
|
||||
branches:
|
||||
- run_nvidia_ci_flash_attn*
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
prev_workflow_run_id:
|
||||
description: 'previous workflow run id to compare'
|
||||
type: string
|
||||
required: false
|
||||
default: ""
|
||||
other_workflow_run_id:
|
||||
description: 'other workflow run id to compare'
|
||||
type: string
|
||||
required: false
|
||||
default: ""
|
||||
|
||||
|
||||
# Used for `push` to easily modify the target workflow runs to compare against
|
||||
env:
|
||||
prev_workflow_run_id: ""
|
||||
other_workflow_run_id: ""
|
||||
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
name: Setup
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Setup
|
||||
run: |
|
||||
mkdir "setup_values"
|
||||
echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt"
|
||||
echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt"
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: setup_values
|
||||
path: setup_values
|
||||
|
||||
|
||||
model-ci:
|
||||
name: Model CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_models_gpu
|
||||
slack_report_channel: "#transformers-ci-flash-attn"
|
||||
docker: huggingface/transformers-all-latest-gpu:flash-attn
|
||||
ci_event: Daily CI
|
||||
runner_type: "a10"
|
||||
report_repo_id: hf-internal-testing/transformers_flash_attn_ci
|
||||
commit_sha: ${{ github.sha }}
|
||||
pytest_marker: "flash_attn_test or flash_attn_3_test"
|
||||
secrets: inherit
|
||||
220
.github/workflows/self-scheduled.yml
vendored
220
.github/workflows/self-scheduled.yml
vendored
@ -34,20 +34,10 @@ on:
|
||||
runner_type:
|
||||
required: false
|
||||
type: string
|
||||
subdirs:
|
||||
models:
|
||||
default: ""
|
||||
required: false
|
||||
type: string
|
||||
pytest_marker:
|
||||
required: false
|
||||
type: string
|
||||
pr_number:
|
||||
required: false
|
||||
type: string
|
||||
outputs:
|
||||
report:
|
||||
description: "Content of the report of new failures"
|
||||
value: ${{ jobs.check_new_failures.outputs.report }}
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
@ -60,6 +50,7 @@ env:
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
NUM_SLICES: 2
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
@ -80,11 +71,8 @@ jobs:
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
env:
|
||||
commit_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
run: |
|
||||
git fetch origin $commit_sha
|
||||
git fetch && git checkout $commit_sha
|
||||
git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
|
||||
|
||||
- name: Cleanup
|
||||
working-directory: /transformers
|
||||
@ -101,17 +89,11 @@ jobs:
|
||||
if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu"]'), inputs.job)
|
||||
name: Identify models to test
|
||||
working-directory: /transformers/tests
|
||||
env:
|
||||
job: ${{ inputs.job }}
|
||||
subdirs: ${{ inputs.subdirs }}
|
||||
NUM_SLICES: 2
|
||||
run: |
|
||||
if [ "$job" = "run_models_gpu" ]; then
|
||||
python3 ../utils/split_model_tests.py --subdirs "$subdirs" --num_splits "$NUM_SLICES" > folder_slices.txt
|
||||
echo "folder_slices=$(cat folder_slices.txt)" >> $GITHUB_OUTPUT
|
||||
python3 -c "import ast; folder_slices = ast.literal_eval(open('folder_slices.txt').read()); open('slice_ids.txt', 'w').write(str(list(range(len(folder_slices)))))"
|
||||
echo "slice_ids=$(cat slice_ids.txt)" >> $GITHUB_OUTPUT
|
||||
elif [ "$job" = "run_trainer_and_fsdp_gpu" ]; then
|
||||
if [ "${{ inputs.job }}" = "run_models_gpu" ]; then
|
||||
echo "folder_slices=$(python3 ../utils/split_model_tests.py --models '${{ inputs.models }}' --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
|
||||
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
|
||||
elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then
|
||||
echo "folder_slices=[['trainer'], ['fsdp']]" >> $GITHUB_OUTPUT
|
||||
echo "slice_ids=[0, 1]" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
@ -120,10 +102,8 @@ jobs:
|
||||
if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
|
||||
name: Identify quantization method to test
|
||||
working-directory: /transformers/tests
|
||||
env:
|
||||
subdirs: ${{ inputs.subdirs || 'None' }}
|
||||
run: |
|
||||
echo "quantization_matrix=$(python3 -c 'import ast; import os; tests = os.getcwd(); quantization_tests = os.listdir(os.path.join(tests, "quantization")); subdirs = ast.literal_eval(os.environ["subdirs"]); quantization_tests = [x.removeprefix("quantization/") for x in subdirs] if subdirs is not None else quantization_tests; d = sorted(list(filter(os.path.isdir, [f"quantization/{x}" for x in quantization_tests]))); print(d)')" >> $GITHUB_OUTPUT
|
||||
echo "quantization_matrix=$(python3 -c 'import os; tests = os.getcwd(); quantization_tests = os.listdir(os.path.join(tests, "quantization")); d = sorted(list(filter(os.path.isdir, [f"quantization/{x}" for x in quantization_tests]))) ; print(d)')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
@ -147,7 +127,6 @@ jobs:
|
||||
commit_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
runner_type: ${{ inputs.runner_type }}
|
||||
report_repo_id: ${{ inputs.report_repo_id }}
|
||||
pytest_marker: ${{ inputs.pytest_marker }}
|
||||
secrets: inherit
|
||||
|
||||
run_trainer_and_fsdp_gpu:
|
||||
@ -181,14 +160,12 @@ jobs:
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu
|
||||
image: huggingface/transformers-pytorch-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
env:
|
||||
commit_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
run: git fetch && git checkout "$commit_sha"
|
||||
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
@ -210,17 +187,15 @@ jobs:
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
working-directory: /transformers
|
||||
shell: bash
|
||||
env:
|
||||
matrix_machine_type: ${{ matrix.machine_type }}
|
||||
run: |
|
||||
echo "$matrix_machine_type"
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "$matrix_machine_type" = "aws-g5-4xlarge-cache" ]; then
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "$matrix_machine_type" = "aws-g5-12xlarge-cache" ]; then
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type="$matrix_machine_type"
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
fi
|
||||
|
||||
echo "$machine_type"
|
||||
@ -229,12 +204,12 @@ jobs:
|
||||
- name: Run all pipeline tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pytest -n 1 -v --dist=loadfile --make-reports="${machine_type}_run_pipelines_torch_gpu_test_reports" tests/pipelines
|
||||
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat "/transformers/reports/${machine_type}_run_pipelines_torch_gpu_test_reports/failures_short.txt"
|
||||
run: cat /transformers/reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports"
|
||||
if: ${{ always() }}
|
||||
@ -258,9 +233,7 @@ jobs:
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
env:
|
||||
commit_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
run: git fetch && git checkout "$commit_sha"
|
||||
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
@ -282,17 +255,15 @@ jobs:
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
working-directory: /transformers
|
||||
shell: bash
|
||||
env:
|
||||
matrix_machine_type: ${{ matrix.machine_type }}
|
||||
run: |
|
||||
echo "$matrix_machine_type"
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "$matrix_machine_type" = "aws-g5-4xlarge-cache" ]; then
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "$matrix_machine_type" = "aws-g5-12xlarge-cache" ]; then
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type="$matrix_machine_type"
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
fi
|
||||
|
||||
echo "$machine_type"
|
||||
@ -302,12 +273,12 @@ jobs:
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
pip install -r examples/pytorch/_tests_requirements.txt
|
||||
python3 -m pytest -v --make-reports="${machine_type}_run_examples_gpu_test_reports" examples/pytorch
|
||||
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_examples_gpu_test_reports examples/pytorch
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat "/transformers/reports/${machine_type}_run_examples_gpu_test_reports/failures_short.txt"
|
||||
run: cat /transformers/reports/${{ env.machine_type }}_run_examples_gpu_test_reports/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_examples_gpu_test_reports"
|
||||
if: ${{ always() }}
|
||||
@ -331,9 +302,7 @@ jobs:
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: ${{ inputs.working-directory-prefix }}/transformers
|
||||
env:
|
||||
commit_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
run: git fetch && git checkout "$commit_sha"
|
||||
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: ${{ inputs.working-directory-prefix }}/transformers
|
||||
@ -355,7 +324,7 @@ jobs:
|
||||
working-directory: ${{ inputs.working-directory-prefix }}/
|
||||
run: |
|
||||
python3 -m pip uninstall -y deepspeed
|
||||
DS_DISABLE_NINJA=1 DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --no-build-isolation --config-settings="--build-option=build_ext" --config-settings="--build-option=-j8" --no-cache -v --disable-pip-version-check
|
||||
DS_DISABLE_NINJA=1 DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
|
||||
|
||||
# To avoid unknown test failures
|
||||
- name: Pre build DeepSpeed *again* (for nightly & Past CI)
|
||||
@ -365,7 +334,7 @@ jobs:
|
||||
python3 -m pip uninstall -y deepspeed
|
||||
rm -rf DeepSpeed
|
||||
git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build
|
||||
DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install . --no-build-isolation --config-settings="--build-option=build_ext" --config-settings="--build-option=-j8" --no-cache -v --disable-pip-version-check
|
||||
DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
@ -383,17 +352,15 @@ jobs:
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
working-directory: ${{ inputs.working-directory-prefix }}/transformers
|
||||
shell: bash
|
||||
env:
|
||||
matrix_machine_type: ${{ matrix.machine_type }}
|
||||
run: |
|
||||
echo "$matrix_machine_type"
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "$matrix_machine_type" = "aws-g5-4xlarge-cache" ]; then
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "$matrix_machine_type" = "aws-g5-12xlarge-cache" ]; then
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type="$matrix_machine_type"
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
fi
|
||||
|
||||
echo "$machine_type"
|
||||
@ -402,14 +369,12 @@ jobs:
|
||||
- name: Run all tests on GPU
|
||||
working-directory: ${{ inputs.working-directory-prefix }}/transformers
|
||||
run: |
|
||||
python3 -m pytest -v --make-reports="${machine_type}_run_torch_cuda_extensions_gpu_test_reports" tests/deepspeed tests/extended
|
||||
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
env:
|
||||
working_directory_prefix: ${{ inputs.working-directory-prefix }}
|
||||
run: cat "${working_directory_prefix}/transformers/reports/${machine_type}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt"
|
||||
run: cat ${{ inputs.working-directory-prefix }}/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
|
||||
if: ${{ always() }}
|
||||
@ -436,19 +401,16 @@ jobs:
|
||||
steps:
|
||||
- name: Echo folder ${{ matrix.folders }}
|
||||
shell: bash
|
||||
env:
|
||||
matrix_folders_raw: ${{ matrix.folders }}
|
||||
run: |
|
||||
echo "$matrix_folders_raw"
|
||||
matrix_folders="${matrix_folders_raw/'quantization/'/'quantization_'}"
|
||||
echo "${{ matrix.folders }}"
|
||||
matrix_folders=${{ matrix.folders }}
|
||||
matrix_folders=${matrix_folders/'quantization/'/'quantization_'}
|
||||
echo "$matrix_folders"
|
||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
env:
|
||||
commit_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
run: git fetch && git checkout "$commit_sha"
|
||||
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
@ -470,17 +432,15 @@ jobs:
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
working-directory: /transformers
|
||||
shell: bash
|
||||
env:
|
||||
matrix_machine_type: ${{ matrix.machine_type }}
|
||||
run: |
|
||||
echo "$matrix_machine_type"
|
||||
echo "${{ matrix.machine_type }}"
|
||||
|
||||
if [ "$matrix_machine_type" = "aws-g5-4xlarge-cache" ]; then
|
||||
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "$matrix_machine_type" = "aws-g5-12xlarge-cache" ]; then
|
||||
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type="$matrix_machine_type"
|
||||
machine_type=${{ matrix.machine_type }}
|
||||
fi
|
||||
|
||||
echo "$machine_type"
|
||||
@ -488,96 +448,20 @@ jobs:
|
||||
|
||||
- name: Run quantization tests on GPU
|
||||
working-directory: /transformers
|
||||
env:
|
||||
folders: ${{ matrix.folders }}
|
||||
run: |
|
||||
python3 -m pytest -v --make-reports="${machine_type}_run_quantization_torch_gpu_${matrix_folders}_test_reports" tests/${folders}
|
||||
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat "/transformers/reports/${machine_type}_run_quantization_torch_gpu_${matrix_folders}_test_reports/failures_short.txt"
|
||||
run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
|
||||
path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
|
||||
|
||||
run_kernels_gpu:
|
||||
if: ${{ inputs.job == 'run_kernels_gpu' }}
|
||||
name: Kernel tests
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [aws-g5-4xlarge-cache]
|
||||
runs-on:
|
||||
group: '${{ matrix.machine_type }}'
|
||||
container:
|
||||
image: ${{ inputs.docker }}
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
env:
|
||||
commit_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
run: git fetch && git checkout "$commit_sha"
|
||||
|
||||
- name: Reinstall transformers in edit mode
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .[testing]
|
||||
|
||||
- name: Install kernels
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip install -U kernels
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Set `machine_type` for report and artifact names
|
||||
working-directory: /transformers
|
||||
shell: bash
|
||||
env:
|
||||
matrix_machine_type: ${{ matrix.machine_type }}
|
||||
run: |
|
||||
echo "$matrix_machine_type"
|
||||
|
||||
if [ "$matrix_machine_type" = "aws-g5-4xlarge-cache" ]; then
|
||||
machine_type=single-gpu
|
||||
elif [ "$matrix_machine_type" = "aws-g5-12xlarge-cache" ]; then
|
||||
machine_type=multi-gpu
|
||||
else
|
||||
machine_type="$matrix_machine_type"
|
||||
fi
|
||||
|
||||
echo "$machine_type"
|
||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||
|
||||
- name: Run kernel tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pytest -v --make-reports="${machine_type}_run_kernels_gpu_test_reports" tests/kernels/test_kernels.py
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat "/transformers/reports/${machine_type}_run_kernels_gpu_test_reports/failures_short.txt"
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_kernels_gpu_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ env.machine_type }}_run_kernels_gpu_test_reports
|
||||
path: /transformers/reports/${{ env.machine_type }}_run_kernels_gpu_test_reports
|
||||
path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
|
||||
|
||||
run_extract_warnings:
|
||||
# Let's only do this for the job `run_models_gpu` to simplify the (already complex) logic.
|
||||
@ -586,10 +470,11 @@ jobs:
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [setup, run_models_gpu]
|
||||
steps:
|
||||
# Checkout in order to run `utils/extract_warnings.py`. Avoid **explicit** checkout (i.e. don't specify `ref`) for
|
||||
# security reason.
|
||||
- name: Checkout transformers
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2
|
||||
ref: ${{ inputs.commit_sha || github.sha }}
|
||||
|
||||
- name: Install transformers
|
||||
run: pip install transformers
|
||||
@ -609,12 +494,9 @@ jobs:
|
||||
working-directory: warnings_in_ci
|
||||
|
||||
- name: Extract warnings in CI artifacts
|
||||
env:
|
||||
github_run_id: ${{ github.run_id }}
|
||||
access_token: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
run: |
|
||||
python3 utils/extract_warnings.py --workflow_run_id "$github_run_id" --output_dir warnings_in_ci --token "$access_token" --from_gh
|
||||
echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d); print(d)')"
|
||||
python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh
|
||||
echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')"
|
||||
|
||||
- name: Upload artifact
|
||||
if: ${{ always() }}
|
||||
@ -633,7 +515,6 @@ jobs:
|
||||
run_examples_gpu,
|
||||
run_torch_cuda_extensions_gpu,
|
||||
run_quantization_torch_gpu,
|
||||
run_kernels_gpu,
|
||||
run_extract_warnings
|
||||
]
|
||||
if: always() && !cancelled()
|
||||
@ -653,17 +534,16 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
check_new_failures:
|
||||
if: ${{ always() && needs.send_results.result == 'success' }}
|
||||
if: ${{ always() && inputs.ci_event == 'Daily CI' && needs.send_results.result == 'success' }}
|
||||
name: Check new failures
|
||||
needs: send_results
|
||||
uses: ./.github/workflows/check_failed_tests.yml
|
||||
with:
|
||||
docker: ${{ inputs.docker }}
|
||||
commit_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
start_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
job: ${{ inputs.job }}
|
||||
slack_report_channel: ${{ inputs.slack_report_channel }}
|
||||
ci_event: ${{ inputs.ci_event }}
|
||||
report_repo_id: ${{ inputs.report_repo_id }}
|
||||
pr_number: ${{ inputs.pr_number }}
|
||||
|
||||
secrets: inherit
|
||||
|
||||
15
.github/workflows/slack-report.yml
vendored
15
.github/workflows/slack-report.yml
vendored
@ -41,16 +41,13 @@ jobs:
|
||||
- name: Preliminary job status
|
||||
shell: bash
|
||||
# For the meaning of these environment variables, see the job `Setup`
|
||||
env:
|
||||
setup_status: ${{ inputs.setup_status }}
|
||||
run: |
|
||||
echo "Setup status: $setup_status"
|
||||
echo "Setup status: ${{ inputs.setup_status }}"
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2
|
||||
# Security: checkout to the `main` branch for untrusted triggers (issue_comment, pull_request_target), otherwise use the specified ref
|
||||
ref: ${{ (github.event_name == 'issue_comment' || github.event_name == 'pull_request_target') && 'main' || (inputs.commit_sha || github.sha) }}
|
||||
ref: ${{ inputs.commit_sha || github.sha }}
|
||||
|
||||
- uses: actions/download-artifact@v4
|
||||
|
||||
@ -84,8 +81,6 @@ jobs:
|
||||
CI_TEST_JOB: ${{ inputs.job }}
|
||||
SETUP_STATUS: ${{ inputs.setup_status }}
|
||||
REPORT_REPO_ID: ${{ inputs.report_repo_id }}
|
||||
quantization_matrix: ${{ inputs.quantization_matrix }}
|
||||
folder_slices: ${{ inputs.folder_slices }}
|
||||
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
|
||||
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
|
||||
# For a job that doesn't depend on (i.e. `needs`) `setup`, the value for `inputs.folder_slices` would be an
|
||||
@ -94,10 +89,10 @@ jobs:
|
||||
pip install huggingface_hub
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
if [ "$quantization_matrix" != "" ]; then
|
||||
python utils/notification_service.py "$quantization_matrix"
|
||||
if [ "${{ inputs.quantization_matrix }}" != "" ]; then
|
||||
python utils/notification_service.py "${{ inputs.quantization_matrix }}"
|
||||
else
|
||||
python utils/notification_service.py "$folder_slices"
|
||||
python utils/notification_service.py "${{ inputs.folder_slices }}"
|
||||
fi
|
||||
|
||||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||
|
||||
35
.github/workflows/ssh-runner.yml
vendored
35
.github/workflows/ssh-runner.yml
vendored
@ -4,7 +4,7 @@ on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
runner_type:
|
||||
description: 'Type of runner to test (a10)'
|
||||
description: 'Type of runner to test (a10 or t4)'
|
||||
required: true
|
||||
docker_image:
|
||||
description: 'Name of the Docker image'
|
||||
@ -36,10 +36,14 @@ jobs:
|
||||
NUM_GPUS: ${{ github.event.inputs.num_gpus }}
|
||||
RUNNER_TYPE: ${{ github.event.inputs.runner_type }}
|
||||
run: |
|
||||
if [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "a10" ]]; then
|
||||
echo "RUNNER=aws-g5-4xlarge-cache-ssh" >> $GITHUB_ENV
|
||||
if [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "t4" ]]; then
|
||||
echo "RUNNER=aws-g4dn-4xlarge-cache" >> $GITHUB_ENV
|
||||
elif [[ "$NUM_GPUS" == "multi" && "$RUNNER_TYPE" == "t4" ]]; then
|
||||
echo "RUNNER=aws-g4dn-12xlarge-cache" >> $GITHUB_ENV
|
||||
elif [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "a10" ]]; then
|
||||
echo "RUNNER=aws-g5-4xlarge-cache" >> $GITHUB_ENV
|
||||
elif [[ "$NUM_GPUS" == "multi" && "$RUNNER_TYPE" == "a10" ]]; then
|
||||
echo "RUNNER=aws-g5-12xlarge-cache-ssh" >> $GITHUB_ENV
|
||||
echo "RUNNER=aws-g5-12xlarge-cache" >> $GITHUB_ENV
|
||||
else
|
||||
echo "RUNNER=" >> $GITHUB_ENV
|
||||
fi
|
||||
@ -47,8 +51,8 @@ jobs:
|
||||
- name: Set runner to use
|
||||
id: set_runner
|
||||
run: |
|
||||
echo "$RUNNER"
|
||||
echo "RUNNER=$RUNNER" >> $GITHUB_OUTPUT
|
||||
echo ${{ env.RUNNER }}
|
||||
echo "RUNNER=${{ env.RUNNER }}" >> $GITHUB_OUTPUT
|
||||
|
||||
ssh_runner:
|
||||
name: "SSH"
|
||||
@ -57,13 +61,13 @@ jobs:
|
||||
group: ${{ needs.get_runner.outputs.RUNNER }}
|
||||
container:
|
||||
image: ${{ github.event.inputs.docker_image }}
|
||||
options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
env:
|
||||
commit_sha: ${{ github.sha }}
|
||||
run: |
|
||||
git fetch && git checkout "$commit_sha"
|
||||
git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Cleanup
|
||||
working-directory: /transformers
|
||||
@ -95,17 +99,14 @@ jobs:
|
||||
- name: Store Slack infos
|
||||
#because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step
|
||||
shell: bash
|
||||
env:
|
||||
user_slack_id: ${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}
|
||||
default_slack_channel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
|
||||
run: |
|
||||
echo "$github_actor"
|
||||
if [ "$user_slack_id" != "" ]; then
|
||||
echo "SLACKCHANNEL=$user_slack_id" >> $GITHUB_ENV
|
||||
echo "${{ env.github_actor }}"
|
||||
if [ "${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" != "" ]; then
|
||||
echo "SLACKCHANNEL=${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" >> $GITHUB_ENV
|
||||
else
|
||||
echo "SLACKCHANNEL=$default_slack_channel" >> $GITHUB_ENV
|
||||
echo "SLACKCHANNEL=${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
|
||||
- name: Tailscale # In order to be able to SSH when a test fails
|
||||
uses: huggingface/tailscale-action@main
|
||||
with:
|
||||
|
||||
@ -14,7 +14,7 @@ This AGENTS.md file provides guidance for code agents working with this codebase
|
||||
|
||||
- PRs should be as brief as possible. Bugfix PRs in particular can often be only one or two lines long, and do not need large comments, docstrings or new functions in this case. Aim to minimize the size of the diff.
|
||||
- When writing tests, they should be added to an existing file. The only exception is for PRs to add a new model, when a new test directory should be created for that model.
|
||||
- Code style is enforced in the CI. You can install the style tools with `pip install -e ".[quality]"`. You can then run `make fixup` to apply style and consistency fixes to your code.
|
||||
- Code style is enforced in the CI. You can install the style tools with `pip install -e .[quality]`. You can then run `make fixup` to apply style and consistency fixes to your code.
|
||||
|
||||
## Copying and inheritance
|
||||
|
||||
@ -36,4 +36,4 @@ After making changes, you should usually run `make fixup` to ensure any copies a
|
||||
the model you made the changes in and any other models that were updated by `make fixup`. Tests can be run with `pytest tests/models/[name]/test_modeling_[name].py`
|
||||
If your changes affect code in other classes like tokenizers or processors, you should run those tests instead, like `test_processing_[name].py` or `test_tokenization_[name].py`.
|
||||
|
||||
In order to run tests, you may need to install dependencies. You can do this with `pip install -e ".[testing]"`. You will probably also need to `pip install torch accelerate` if your environment does not already have them.
|
||||
In order to run tests, you may need to install dependencies. You can do this with `pip install -e .[testing]`. You will probably also need to `pip install torch accelerate` if your environment does not already have them.
|
||||
1
Makefile
1
Makefile
@ -45,7 +45,6 @@ repo-consistency:
|
||||
python utils/check_modular_conversion.py
|
||||
python utils/check_dummies.py
|
||||
python utils/check_repo.py
|
||||
python utils/check_init_weights_data.py
|
||||
python utils/check_inits.py
|
||||
python utils/check_pipeline_typing.py
|
||||
python utils/check_config_docstrings.py
|
||||
|
||||
@ -64,8 +64,8 @@ limitations under the License.
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/transformers_as_a_model_definition.png"/>
|
||||
</h3>
|
||||
|
||||
Transformers acts as the model-definition framework for state-of-the-art machine learning with text, computer
|
||||
vision, audio, video, and multimodal models, for both inference and training.
|
||||
Transformers acts as the model-definition framework for state-of-the-art machine learning models in text, computer
|
||||
vision, audio, video, and multimodal model, for both inference and training.
|
||||
|
||||
It centralizes the model definition so that this definition is agreed upon across the ecosystem. `transformers` is the
|
||||
pivot across frameworks: if a model definition is supported, it will be compatible with the majority of training
|
||||
|
||||
@ -9,12 +9,6 @@ In this list, we showcase incredibly impactful and novel projects that have push
|
||||
adding other projects to the list. If you believe a project should be here and it's not, then please, open a PR
|
||||
to add it.
|
||||
|
||||
## [◉ Universal Intelligence](https://github.com/blueraai/universal-intelligence)
|
||||
|
||||
[Universal Intelligence](https://github.com/blueraai/universal-intelligence) aims to standardize models, tools, and agents —transforming them into simple, composable, portable, interoperable, framework-agnostic, hardware-agnostic interfaces (through auto-negotiation and resource sharing); for fast and accessible development of AI applications.
|
||||
|
||||
Keywords: Protocol, Open-source, LLMs, Large Language Models, Agents, Low-code
|
||||
|
||||
## [gpt4all](https://github.com/nomic-ai/gpt4all)
|
||||
|
||||
[gpt4all](https://github.com/nomic-ai/gpt4all) is an ecosystem of open-source chatbots trained on massive collections of clean assistant data including code, stories and dialogue. It offers open-source, large language models such as LLaMA and GPT-J trained in an assistant-style.
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
gpustat==1.1.1
|
||||
psutil==6.0.0
|
||||
psycopg2==2.9.9
|
||||
torch>=2.4.0
|
||||
hf_xet
|
||||
pandas>=1.5.0
|
||||
pandas>=1.5.0
|
||||
@ -1,11 +1,8 @@
|
||||
import hashlib
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from transformers.utils.import_utils import is_flash_attn_2_available
|
||||
|
||||
|
||||
KERNELIZATION_AVAILABLE = False
|
||||
try:
|
||||
@ -21,22 +18,11 @@ logger = logging.getLogger(__name__)
|
||||
class BenchmarkConfig:
|
||||
"""Configuration for a single benchmark scenario."""
|
||||
|
||||
all_attn_implementations = [
|
||||
("flash_attention_2", None),
|
||||
("eager", None),
|
||||
("sdpa", "math"),
|
||||
("sdpa", "flash_attention"),
|
||||
("flex_attention", None),
|
||||
]
|
||||
|
||||
all_compiled_modes = [None, "default", "reduce-overhead", "max-autotune", "max-autotune-no-cudagraphs"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
warmup_iterations: int = 5,
|
||||
measurement_iterations: int = 20,
|
||||
gpu_monitoring: bool = True, # NOTE: you may want to disable this at times as we have obsvered it could heavily slow down benchmarks on AMD
|
||||
continuous_batching: bool = False,
|
||||
batch_size: int = 1,
|
||||
sequence_length: int = 128,
|
||||
num_tokens_to_generate: int = 128,
|
||||
@ -52,7 +38,6 @@ class BenchmarkConfig:
|
||||
self.warmup_iterations = warmup_iterations
|
||||
self.measurement_iterations = measurement_iterations
|
||||
self.gpu_monitoring = gpu_monitoring
|
||||
self.continuous_batching = continuous_batching
|
||||
# Input parameters
|
||||
self.batch_size = batch_size
|
||||
self.sequence_length = sequence_length
|
||||
@ -74,35 +59,12 @@ class BenchmarkConfig:
|
||||
def check_validity(self, skip_validity_check: bool = False) -> None:
|
||||
if skip_validity_check:
|
||||
return
|
||||
# Check FA is installed
|
||||
if self.attn_implementation == "flash_attention_2" and not is_flash_attn_2_available():
|
||||
logger.warning(
|
||||
"Flash attention does not support compile mode. Defaulting to SDPA w/ flash attention backend."
|
||||
)
|
||||
self.attn_implementation = "sdpa"
|
||||
self.sdpa_backend = "flash_attention"
|
||||
# Flash attention does not support compile mode, so we turn it off # FIXME: it would be better to support it
|
||||
is_fa = self.attn_implementation == "flash_attention_2"
|
||||
is_fa |= self.attn_implementation == "sdpa" and self.sdpa_backend == "flash_attention"
|
||||
if is_fa:
|
||||
logger.warning("Flash attention does not support compile mode. Turning off compile mode.")
|
||||
self.compile_mode = None
|
||||
# Handle SDPA backend if not determined by the config (needs to be done before skipping duplicates)
|
||||
if self.attn_implementation == "sdpa" and self.sdpa_backend is None:
|
||||
default_backend = "flash_attention" # FIXME: torch has a _cur_sdpa_kernel_backends but it fails
|
||||
logger.warning(f"No SDPA backend provided, using {default_backend} instead.")
|
||||
self.sdpa_backend = default_backend
|
||||
if self.continuous_batching:
|
||||
if self.attn_implementation == "flex_attention":
|
||||
logger.error(
|
||||
"disabling continuous batching because of invalid configuration: flex attention is not supported"
|
||||
)
|
||||
self.continuous_batching = False
|
||||
elif self.attn_implementation == "sdpa" and self.sdpa_backend is not None:
|
||||
logger.warning(
|
||||
"when continuous batching is enabled, sdpa_backend must be None because of the attention mask, setting it to None"
|
||||
)
|
||||
self.sdpa_backend = "math"
|
||||
|
||||
@property
|
||||
def hash(self) -> str:
|
||||
@ -118,7 +80,6 @@ class BenchmarkConfig:
|
||||
attn_code += f"_{self.sdpa_backend}" if self.attn_implementation == "sdpa" else ""
|
||||
compile_str = f"compiled_{self.compile_mode}" if self.compile_mode is not None else "uncompiled"
|
||||
kernelize_str = "kernelized" if self.kernelize else "unkernelized"
|
||||
continuous_batching_str = "cb" if self.continuous_batching else "generate"
|
||||
sep = "-"
|
||||
else:
|
||||
iter_str = f"{self.warmup_iterations} warmup, {self.measurement_iterations} iterations"
|
||||
@ -128,11 +89,8 @@ class BenchmarkConfig:
|
||||
attn_code += f" with {self.sdpa_backend} backend" if self.attn_implementation == "sdpa" else ""
|
||||
compile_str = "compiled" if self.compile_mode is not None else "not compiled"
|
||||
kernelize_str = "kernelized" if self.kernelize else "not kernelized"
|
||||
continuous_batching_str = "continuous batching" if self.continuous_batching else "regular generate"
|
||||
sep = ", "
|
||||
return sep.join(
|
||||
[iter_str, gpu_monitor_str, dimensions_str, attn_code, compile_str, kernelize_str, continuous_batching_str]
|
||||
)
|
||||
return sep.join([iter_str, gpu_monitor_str, dimensions_str, attn_code, compile_str, kernelize_str])
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
@ -140,7 +98,6 @@ class BenchmarkConfig:
|
||||
"warmup_iterations": self.warmup_iterations,
|
||||
"measurement_iterations": self.measurement_iterations,
|
||||
"gpu_monitoring": self.gpu_monitoring,
|
||||
"continuous_batching": self.continuous_batching,
|
||||
"batch_size": self.batch_size,
|
||||
"sequence_length": self.sequence_length,
|
||||
"num_tokens_to_generate": self.num_tokens_to_generate,
|
||||
@ -157,7 +114,6 @@ class BenchmarkConfig:
|
||||
warmup_iterations=data.get("warmup_iterations", 5),
|
||||
measurement_iterations=data.get("measurement_iterations", 20),
|
||||
gpu_monitoring=data.get("gpu_monitoring", False),
|
||||
continuous_batching=data.get("continuous_batching", False),
|
||||
batch_size=data.get("batch_size", 1),
|
||||
sequence_length=data.get("sequence_length", 128),
|
||||
num_tokens_to_generate=data.get("num_tokens_to_generate", 128),
|
||||
@ -171,72 +127,88 @@ class BenchmarkConfig:
|
||||
)
|
||||
|
||||
|
||||
def adapt_configs(
|
||||
configs: list[BenchmarkConfig],
|
||||
warmup_iterations: int | list[int] = 5,
|
||||
measurement_iterations: int | list[int] = 20,
|
||||
batch_size: int | list[int] = 1,
|
||||
sequence_length: int | list[int] = 128,
|
||||
num_tokens_to_generate: int | list[int] = 128,
|
||||
gpu_monitoring: bool | list[bool] = True,
|
||||
def cross_generate_configs(
|
||||
attn_impl_and_sdpa_backend: list[tuple[str, str | None]],
|
||||
compiled_mode: list[str | None],
|
||||
kernelized: list[bool],
|
||||
warmup_iterations: int = 5,
|
||||
measurement_iterations: int = 20,
|
||||
batch_size: int = 1,
|
||||
sequence_length: int = 128,
|
||||
num_tokens_to_generate: int = 128,
|
||||
gpu_monitoring: bool = True,
|
||||
) -> list[BenchmarkConfig]:
|
||||
parameters = (
|
||||
x if isinstance(x, list) else [x]
|
||||
for x in [
|
||||
warmup_iterations,
|
||||
measurement_iterations,
|
||||
batch_size,
|
||||
sequence_length,
|
||||
num_tokens_to_generate,
|
||||
gpu_monitoring,
|
||||
]
|
||||
)
|
||||
iterator = itertools.product(*parameters)
|
||||
|
||||
adapted_configs = []
|
||||
for warmup_iters, measurement_iters, bs, seqlen, ntok, monitor in iterator:
|
||||
for config in configs:
|
||||
config = config.to_dict()
|
||||
config["warmup_iterations"] = warmup_iters
|
||||
config["measurement_iterations"] = measurement_iters
|
||||
config["batch_size"] = bs
|
||||
config["sequence_length"] = seqlen
|
||||
config["num_tokens_to_generate"] = ntok
|
||||
config["gpu_monitoring"] = monitor
|
||||
adapted_configs.append(BenchmarkConfig.from_dict(config))
|
||||
return adapted_configs
|
||||
|
||||
|
||||
def get_config_by_level(level: int) -> list[BenchmarkConfig]:
|
||||
# Create kwargs common to all configs
|
||||
kwargs = {
|
||||
"warmup_iterations": warmup_iterations,
|
||||
"measurement_iterations": measurement_iterations,
|
||||
"batch_size": batch_size,
|
||||
"sequence_length": sequence_length,
|
||||
"num_tokens_to_generate": num_tokens_to_generate,
|
||||
"gpu_monitoring": gpu_monitoring,
|
||||
}
|
||||
# Cross-generate all combinations of attn_implementation, compiled_mode, and kernelized
|
||||
configs = []
|
||||
# Early return if level is greater than 3: we generate all combinations of configs, maybe even w/ all compile modes
|
||||
if level >= 3:
|
||||
for attn_implementation, sdpa_backend in BenchmarkConfig.all_attn_implementations:
|
||||
# Usually there is not much to gain by compiling with other modes, but we allow it for level 4
|
||||
compile_modes = BenchmarkConfig.all_compiled_modes if level >= 4 else [None, "default"]
|
||||
for cm in compile_modes:
|
||||
for kernelize_on in {False, KERNELIZATION_AVAILABLE}:
|
||||
for cb_on in [False, True]:
|
||||
configs.append(
|
||||
BenchmarkConfig(
|
||||
attn_implementation=attn_implementation,
|
||||
sdpa_backend=sdpa_backend,
|
||||
compile_mode=cm,
|
||||
kernelize=kernelize_on,
|
||||
continuous_batching=cb_on,
|
||||
)
|
||||
)
|
||||
return configs
|
||||
# Otherwise, we add the configs for the given level
|
||||
if level >= 0:
|
||||
configs.append(BenchmarkConfig(attn_implementation="flex_attention", compile_mode="default"))
|
||||
if level >= 1:
|
||||
configs.append(BenchmarkConfig(attn_implementation="flash_attention_2"))
|
||||
configs.append(BenchmarkConfig(attn_implementation="eager", compile_mode="default"))
|
||||
configs.append(BenchmarkConfig(attn_implementation="flash_attention_2", continuous_batching=True))
|
||||
if level >= 2:
|
||||
configs.append(BenchmarkConfig(attn_implementation="sdpa", compile_mode="default"))
|
||||
configs.append(BenchmarkConfig(attn_implementation="flex_attention", compile_mode="default", kernelize=True))
|
||||
configs.append(BenchmarkConfig(attn_implementation="flash_attention_2", kernelize=True))
|
||||
configs.append(BenchmarkConfig(attn_implementation="paged|sdpa", continuous_batching=True))
|
||||
for attn_implementation, sdpa_backend in list(dict.fromkeys(attn_impl_and_sdpa_backend)):
|
||||
for cm in list(dict.fromkeys(compiled_mode)):
|
||||
for kernelize_on in list(dict.fromkeys(kernelized)):
|
||||
config = BenchmarkConfig(
|
||||
attn_implementation=attn_implementation,
|
||||
sdpa_backend=sdpa_backend,
|
||||
compile_mode=cm,
|
||||
kernelize=kernelize_on,
|
||||
**kwargs,
|
||||
)
|
||||
configs.append(config)
|
||||
return configs
|
||||
|
||||
|
||||
def generate_all_configs(
|
||||
warmup_iterations: int = 5,
|
||||
measurement_iterations: int = 20,
|
||||
batch_size: int = 1,
|
||||
sequence_length: int = 128,
|
||||
num_tokens_to_generate: int = 128,
|
||||
gpu_monitoring: bool = True,
|
||||
) -> list[BenchmarkConfig]:
|
||||
all_attn_implementations = [
|
||||
("flash_attention_2", None),
|
||||
("eager", None),
|
||||
("sdpa", "math"),
|
||||
("sdpa", "flash_attention"),
|
||||
("flex_attention", None),
|
||||
]
|
||||
return cross_generate_configs(
|
||||
attn_impl_and_sdpa_backend=all_attn_implementations,
|
||||
compiled_mode=[None, "default", "reduce-overhead", "max-autotune", "max-autotune-no-cudagraphs"],
|
||||
kernelized=[False, KERNELIZATION_AVAILABLE],
|
||||
warmup_iterations=warmup_iterations,
|
||||
measurement_iterations=measurement_iterations,
|
||||
batch_size=batch_size,
|
||||
sequence_length=sequence_length,
|
||||
num_tokens_to_generate=num_tokens_to_generate,
|
||||
gpu_monitoring=gpu_monitoring,
|
||||
)
|
||||
|
||||
|
||||
def generate_main_configs(
|
||||
warmup_iterations: int = 5,
|
||||
measurement_iterations: int = 20,
|
||||
batch_size: int = 1,
|
||||
sequence_length: int = 128,
|
||||
num_tokens_to_generate: int = 128,
|
||||
) -> list[BenchmarkConfig]:
|
||||
# Create kwargs common to all configs
|
||||
kwargs = {
|
||||
"warmup_iterations": warmup_iterations,
|
||||
"measurement_iterations": measurement_iterations,
|
||||
"batch_size": batch_size,
|
||||
"sequence_length": sequence_length,
|
||||
"num_tokens_to_generate": num_tokens_to_generate,
|
||||
}
|
||||
return [ # TODO: test max-autotune instead of default
|
||||
BenchmarkConfig(attn_implementation="flex_attention", compile_mode="default", gpu_monitoring=False, **kwargs),
|
||||
BenchmarkConfig(attn_implementation="flex_attention", compile_mode="default", gpu_monitoring=True, **kwargs),
|
||||
BenchmarkConfig(attn_implementation="eager", compile_mode="default", gpu_monitoring=True, **kwargs),
|
||||
BenchmarkConfig(attn_implementation="flash_attention_2", gpu_monitoring=True, **kwargs),
|
||||
]
|
||||
|
||||
@ -234,9 +234,8 @@ class BenchmarkRunner:
|
||||
self.logger.info(f"Running benchmark scenario: {config.name}")
|
||||
|
||||
# Quick validation: try one measurement first to see if this scenario works
|
||||
generate_fn = self.time_generate_batch if config.continuous_batching else self.time_generate
|
||||
flush_memory()
|
||||
e2e_latency, token_generation_times, shape_and_decoded_output, gpu_metrics = generate_fn(
|
||||
e2e_latency, token_generation_times, shape_and_decoded_output, gpu_metrics = self.time_generate(
|
||||
max_new_tokens=1, gpu_monitor=None
|
||||
)
|
||||
if e2e_latency < 0:
|
||||
@ -246,14 +245,14 @@ class BenchmarkRunner:
|
||||
# Warmup runs
|
||||
self.logger.info(f"Warming up with {config.warmup_iterations} iterations...")
|
||||
for _ in trange(config.warmup_iterations):
|
||||
_ = generate_fn(max_new_tokens=config.num_tokens_to_generate)
|
||||
_ = self.time_generate(max_new_tokens=config.num_tokens_to_generate)
|
||||
self.logger.info("Warmup over.")
|
||||
|
||||
# Measurement runs
|
||||
result = BenchmarkResult()
|
||||
self.logger.info(f"Benchmarking with {config.measurement_iterations} iterations.")
|
||||
for _ in trange(config.measurement_iterations):
|
||||
e2e_latency, token_generation_times, shape_and_decoded_output, gpu_metrics = generate_fn(
|
||||
e2e_latency, token_generation_times, shape_and_decoded_output, gpu_metrics = self.time_generate(
|
||||
max_new_tokens=config.num_tokens_to_generate,
|
||||
gpu_monitor=(GPUMonitor(logger=self.logger) if config.gpu_monitoring else None),
|
||||
)
|
||||
@ -275,58 +274,6 @@ class BenchmarkRunner:
|
||||
"config": config,
|
||||
}
|
||||
|
||||
# TODO: refactor `generate_batch` to handle streaming so we can use it here
|
||||
def time_generate_batch(
|
||||
self,
|
||||
max_new_tokens: int,
|
||||
gpu_monitor: GPUMonitor | None = None,
|
||||
) -> tuple[float, list[float], str, GPURawMetrics | None]:
|
||||
if gpu_monitor is not None:
|
||||
gpu_monitor.start()
|
||||
config = GenerationConfig(
|
||||
max_new_tokens=max_new_tokens,
|
||||
eos_token_id=self.tokenizer.eos_token_id,
|
||||
pad_token_id=self.tokenizer.pad_token_id,
|
||||
do_sample=True,
|
||||
)
|
||||
manager = self.model.init_continuous_batching(config)
|
||||
manager.start()
|
||||
try:
|
||||
first_req_results = []
|
||||
timestamps = []
|
||||
wall_time_0 = time.perf_counter()
|
||||
inputs = self.inputs["input_ids"].tolist()
|
||||
manager.add_requests(inputs, max_new_tokens=max_new_tokens, streaming=True)
|
||||
first_req_id = None
|
||||
num_requests = len(inputs)
|
||||
finished_requests = 0
|
||||
while finished_requests < num_requests:
|
||||
# NOTE: I don't like having the extra if stmt here, but hopefully won't degrade perf too much
|
||||
result = manager.get_result()
|
||||
if result:
|
||||
timestamps.append(time.perf_counter() - wall_time_0)
|
||||
if result.is_finished():
|
||||
finished_requests += 1
|
||||
if first_req_id is None:
|
||||
first_req_id = result.request_id
|
||||
if result.request_id == first_req_id:
|
||||
first_req_results.append(result)
|
||||
else:
|
||||
if not manager.is_running():
|
||||
raise RuntimeError("Generation thread exited unexpectedly")
|
||||
wall_time_1 = time.perf_counter()
|
||||
gpu_metrics = gpu_monitor.stop_and_collect() if gpu_monitor is not None else None
|
||||
decoded_output = self.tokenizer.decode(
|
||||
[res.generated_tokens[0] for res in first_req_results], skip_special_tokens=True
|
||||
)
|
||||
shape_and_decoded_output = f"{(1, len(first_req_results))} | {decoded_output}"
|
||||
e2e_latency = wall_time_1 - wall_time_0
|
||||
return e2e_latency, timestamps, shape_and_decoded_output, gpu_metrics
|
||||
except Exception as e:
|
||||
raise e
|
||||
finally:
|
||||
manager.stop()
|
||||
|
||||
def time_generate(
|
||||
self,
|
||||
max_new_tokens: int,
|
||||
@ -392,6 +339,12 @@ class BenchmarkRunner:
|
||||
|
||||
n_configs = len(benchmark_configs)
|
||||
for i, config in enumerate(benchmark_configs):
|
||||
# Handle SDPA backend if not determined by the config (needs to be done before skipping duplicates)
|
||||
if config.attn_implementation == "sdpa" and config.sdpa_backend is None:
|
||||
default_backend = "flash_attention" # FIXME: torch has a _cur_sdpa_kernel_backends but it fails
|
||||
self.logger.warning(f"No SDPA backend provided, using {default_backend} instead.")
|
||||
config.sdpa_backend = default_backend
|
||||
|
||||
# Skip if already run
|
||||
if config.hash in all_results:
|
||||
self.logger.info(f"Skipping duplicate config {config.name} for model {model_id} ({i + 1}/{n_configs})")
|
||||
@ -415,27 +368,21 @@ class BenchmarkRunner:
|
||||
self.cleanup()
|
||||
self.save_results(model_id, all_results, timestamp=timestamp)
|
||||
|
||||
if len(all_results) < 1:
|
||||
raise RuntimeError("No benchmark was run succesfully")
|
||||
|
||||
if pretty_print_summary:
|
||||
print()
|
||||
print("=" * 100)
|
||||
print(f"Finished benchmarks in {time.perf_counter() - start_time:.2f} seconds")
|
||||
print(f"Total number of benchmarks: {len(all_results)}")
|
||||
print("First run metadata:")
|
||||
first_key = list(all_results.keys())[0]
|
||||
first_metadata = all_results[first_key]["metadata"].to_dict()
|
||||
hardware_info = first_metadata.pop("hardware_info")
|
||||
pretty_print_dict(first_metadata | hardware_info, tabs=1)
|
||||
if len(all_results) > 0:
|
||||
print("First run metadata:")
|
||||
first_key = list(all_results.keys())[0]
|
||||
first_metadata = all_results[first_key]["metadata"].to_dict()
|
||||
hardware_info = first_metadata.pop("hardware_info")
|
||||
pretty_print_dict(first_metadata | hardware_info, tabs=1)
|
||||
for result in all_results.values():
|
||||
print("=" * 100)
|
||||
print(f"Config: {result['config'].infer_name(compact=False)}\n")
|
||||
result["measurements"].pprint(
|
||||
batch_size=result["config"].batch_size,
|
||||
num_generated_tokens=result["config"].num_tokens_to_generate,
|
||||
tabs=1,
|
||||
)
|
||||
result["measurements"].pprint(batch_size=result["config"].batch_size, tabs=1)
|
||||
print("=" * 100)
|
||||
|
||||
return (timestamp, all_results)
|
||||
|
||||
@ -36,17 +36,16 @@ def add_unit_to_duration(stats: dict[str, float]) -> dict[str, str]:
|
||||
return stats
|
||||
|
||||
|
||||
def equalize_lengths_and_collate(stats: dict[str, dict[str, str]]) -> dict[str, str]:
|
||||
"""Note: This operation is destructive as it will update values in place before returning a new correctly formatted dict"""
|
||||
def equalize_lengths_and_collate(stats: list[dict[str, str]]) -> list[str]:
|
||||
keys = ["avg", "std", "min", "med", "max", "p95"]
|
||||
for key in keys:
|
||||
max_length = max(len(stat[key]) for stat in stats.values())
|
||||
for stat in stats.values():
|
||||
max_length = max(len(stat[key]) for stat in stats)
|
||||
for stat in stats:
|
||||
stat[key] = stat[key].ljust(max_length, " ")
|
||||
return {name: " ".join([f"{key}={stat[key]}" for key in keys]) for name, stat in stats.items()}
|
||||
return [" ".join([f"{key}={stat[key]}" for key in keys]) for stat in stats]
|
||||
|
||||
|
||||
def pretty_print_dict(data: dict[str, str], tabs: int = 0) -> None:
|
||||
def pretty_print_dict(data: dict[str, Any], tabs: int = 0) -> None:
|
||||
max_key_length = max([len(key) for key in data.keys()])
|
||||
for key, value in data.items():
|
||||
tabs_str = " " * tabs
|
||||
@ -142,19 +141,27 @@ class BenchmarkResult:
|
||||
def get_measured_itl(self) -> list[float]:
|
||||
return [(dt[-1] - dt[0]) / (len(dt) - 1) for dt in self.token_generation_times if len(dt) > 1]
|
||||
|
||||
def get_throughput(self, total_generated_tokens: int) -> list[float]:
|
||||
return [total_generated_tokens / e2e_latency for e2e_latency in self.e2e_latency]
|
||||
def get_throughput(self, batch_size: int) -> float:
|
||||
return [
|
||||
batch_size * len(dt) / e2e_latency
|
||||
for e2e_latency, dt in zip(self.e2e_latency, self.token_generation_times)
|
||||
]
|
||||
|
||||
def pprint(self, batch_size: int = 0, num_generated_tokens: int = 0, tabs: int = 0) -> None:
|
||||
measurements = {
|
||||
"E2E Latency": add_unit_to_duration(compute_basic_statistics(self.e2e_latency)),
|
||||
"Time to First Token": add_unit_to_duration(compute_basic_statistics(self.get_measured_ttft())),
|
||||
}
|
||||
itl_values = self.get_measured_itl()
|
||||
if len(itl_values) > 0:
|
||||
measurements["Inter-Token Latency"] = add_unit_to_duration(compute_basic_statistics(itl_values))
|
||||
def pprint(self, batch_size: int = 0, tabs: int = 0) -> None:
|
||||
stats_to_collate = [
|
||||
add_unit_to_duration(compute_basic_statistics(self.e2e_latency)),
|
||||
add_unit_to_duration(compute_basic_statistics(self.get_measured_ttft())),
|
||||
add_unit_to_duration(compute_basic_statistics(self.get_measured_itl())),
|
||||
]
|
||||
if batch_size > 0:
|
||||
throughput_stats = compute_basic_statistics(self.get_throughput(batch_size * num_generated_tokens))
|
||||
measurements["Throughput"] = {key: f"{value:.2f}tok/s" for key, value in throughput_stats.items()}
|
||||
dict_to_pprint = equalize_lengths_and_collate(measurements)
|
||||
throughput_stats = compute_basic_statistics(self.get_throughput(batch_size))
|
||||
stats_to_collate.append({key: f"{value:.2f}tok/s" for key, value in throughput_stats.items()})
|
||||
collated_stats = equalize_lengths_and_collate(stats_to_collate)
|
||||
dict_to_pprint = {
|
||||
"E2E Latency": collated_stats[0],
|
||||
"Time to First Token": collated_stats[1],
|
||||
"Inter-Token Latency": collated_stats[2],
|
||||
}
|
||||
if batch_size > 0:
|
||||
dict_to_pprint["Throughput"] = collated_stats[3]
|
||||
pretty_print_dict(dict_to_pprint, tabs=tabs)
|
||||
|
||||
@ -2,5 +2,6 @@ numpy>=1.21.0
|
||||
psutil>=5.8.0
|
||||
gpustat>=1.0.0
|
||||
torch>=2.0.0
|
||||
transformers>=4.30.0
|
||||
datasets>=2.10.0
|
||||
huggingface_hub>=0.16.0
|
||||
|
||||
@ -23,7 +23,7 @@ import logging
|
||||
import sys
|
||||
import uuid
|
||||
|
||||
from framework.benchmark_config import adapt_configs, get_config_by_level
|
||||
from framework.benchmark_config import BenchmarkConfig, generate_all_configs, generate_main_configs
|
||||
from framework.benchmark_runner import BenchmarkRunner
|
||||
|
||||
|
||||
@ -40,14 +40,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--sequence-length", "-s", type=int, nargs="+", help="Sequence length")
|
||||
parser.add_argument("--num-tokens-to-generate", "-n", type=int, nargs="+", help="Number of tokens to generate")
|
||||
|
||||
parser.add_argument(
|
||||
"--level",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Level of coverage for the benchmark. 0: only the main config, 1: a few important configs, 2: a config for"
|
||||
" each attn implementation an option, 3: cross-generate all combinations of configs, 4: cross-generate all"
|
||||
" combinations of configs w/ all compile modes",
|
||||
)
|
||||
parser.add_argument("--cross-generate", action="store_true", help="Cross-generate all combinations of configs")
|
||||
parser.add_argument("--num-tokens-to-profile", "-p", type=int, default=0, help="Number of tokens to profile")
|
||||
|
||||
parser.add_argument("--branch-name", type=str, help="Git branch name")
|
||||
@ -80,34 +73,70 @@ if __name__ == "__main__":
|
||||
logger.info(f"Benchmark run UUID: {benchmark_run_uuid}")
|
||||
logger.info(f"Output directory: {args.output_dir}")
|
||||
|
||||
# We cannot compute ITL if we don't have at least two measurements
|
||||
if any(n <= 1 for n in args.num_tokens_to_generate):
|
||||
raise ValueError("--num_tokens_to_generate arguments should be larger than 1")
|
||||
|
||||
# Error out if one of the arguments is not provided
|
||||
if len(args.batch_size) * len(args.sequence_length) * len(args.num_tokens_to_generate) == 0:
|
||||
raise ValueError(
|
||||
"At least one of the arguments --batch-size, --sequence-length, or --num-tokens-to-generate is required"
|
||||
)
|
||||
|
||||
# Get the configs for the given coverage level
|
||||
configs = get_config_by_level(args.level)
|
||||
# Adapt the configs to the given arguments
|
||||
configs = adapt_configs(
|
||||
configs,
|
||||
args.warmup,
|
||||
args.iterations,
|
||||
args.batch_size,
|
||||
args.sequence_length,
|
||||
args.num_tokens_to_generate,
|
||||
not args.no_gpu_monitoring,
|
||||
)
|
||||
# If there is only one (batch_size, sequence_length, num_tokens_to_generate), we benchmark across configs
|
||||
elif len(args.batch_size) * len(args.sequence_length) * len(args.num_tokens_to_generate) == 1:
|
||||
if args.cross_generate:
|
||||
benchmark_configs = generate_all_configs(
|
||||
warmup_iterations=args.warmup,
|
||||
measurement_iterations=args.iterations,
|
||||
batch_size=args.batch_size[0],
|
||||
sequence_length=args.sequence_length[0],
|
||||
num_tokens_to_generate=args.num_tokens_to_generate[0],
|
||||
gpu_monitoring=not args.no_gpu_monitoring,
|
||||
)
|
||||
else:
|
||||
benchmark_configs = generate_main_configs(
|
||||
warmup_iterations=args.warmup,
|
||||
measurement_iterations=args.iterations,
|
||||
batch_size=args.batch_size[0],
|
||||
sequence_length=args.sequence_length[0],
|
||||
num_tokens_to_generate=args.num_tokens_to_generate[0],
|
||||
)
|
||||
|
||||
runner = BenchmarkRunner(logger, args.output_dir, args.branch_name, args.commit_id, args.commit_message)
|
||||
# Otherwise, we benchmark across all combinations of dimensions
|
||||
else:
|
||||
main_config = generate_main_configs(
|
||||
warmup_iterations=args.warmup,
|
||||
measurement_iterations=args.iterations,
|
||||
batch_size=args.batch_size[0],
|
||||
sequence_length=args.sequence_length[0],
|
||||
num_tokens_to_generate=args.num_tokens_to_generate[0],
|
||||
)[0]
|
||||
benchmark_configs = []
|
||||
for num_tokens_to_generate in args.num_tokens_to_generate:
|
||||
for sequence_length in args.sequence_length:
|
||||
for batch_size in args.batch_size:
|
||||
cfg_dict = main_config.to_dict()
|
||||
cfg_dict["batch_size"] = batch_size
|
||||
cfg_dict["sequence_length"] = sequence_length
|
||||
cfg_dict["num_tokens_to_generate"] = num_tokens_to_generate
|
||||
cfg_dict.pop("name")
|
||||
benchmark_configs.append(BenchmarkConfig.from_dict(cfg_dict))
|
||||
|
||||
runner = BenchmarkRunner(
|
||||
logger,
|
||||
args.output_dir,
|
||||
args.branch_name,
|
||||
args.commit_id,
|
||||
args.commit_message,
|
||||
)
|
||||
timestamp, results = runner.run_benchmarks(
|
||||
args.model_id, configs, args.num_tokens_to_profile, pretty_print_summary=True
|
||||
args.model_id,
|
||||
benchmark_configs,
|
||||
args.num_tokens_to_profile,
|
||||
pretty_print_summary=True,
|
||||
)
|
||||
|
||||
dataset_id = args.push_result_to_dataset
|
||||
if dataset_id is not None and len(results) > 0:
|
||||
runner.push_results_to_hub(dataset_id, results, timestamp)
|
||||
runner.push_results_to_hub(
|
||||
dataset_id,
|
||||
results,
|
||||
timestamp,
|
||||
)
|
||||
|
||||
@ -87,8 +87,6 @@ def pytest_configure(config):
|
||||
config.addinivalue_line("markers", "not_device_test: mark the tests always running on cpu")
|
||||
config.addinivalue_line("markers", "torch_compile_test: mark test which tests torch compile functionality")
|
||||
config.addinivalue_line("markers", "torch_export_test: mark test which tests torch export functionality")
|
||||
config.addinivalue_line("markers", "flash_attn_test: mark test which tests flash attention functionality")
|
||||
config.addinivalue_line("markers", "flash_attn_3_test: mark test which tests flash attention 3 functionality")
|
||||
|
||||
os.environ["DISABLE_SAFETENSORS_CONVERSION"] = "true"
|
||||
|
||||
|
||||
@ -5,7 +5,7 @@ ARG REF=main
|
||||
RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip install uv && uv pip install --no-cache-dir -U pip setuptools GitPython
|
||||
RUN uv pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir --upgrade 'torch<2.9' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir pypi-kenlm
|
||||
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[quality,testing,torch-speech,vision]"
|
||||
RUN git lfs install
|
||||
|
||||
@ -17,7 +17,7 @@ RUN make install -j 10
|
||||
|
||||
WORKDIR /
|
||||
|
||||
RUN uv pip install --no-cache --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache --upgrade 'torch<2.9' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ja,testing,sentencepiece,spacy,ftfy,rjieba]" unidic unidic-lite
|
||||
# spacy is not used so not tested. Causes to failures. TODO fix later
|
||||
|
||||
@ -5,7 +5,7 @@ USER root
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git-lfs ffmpeg curl
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch<2.9' 'torchaudio' 'torchvision' 'torchcodec<0.8' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" seqeval albumentations jiwer
|
||||
|
||||
|
||||
@ -5,7 +5,7 @@ USER root
|
||||
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git libgl1 g++ tesseract-ocr git-lfs curl
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch<2.9' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir --no-deps timm accelerate
|
||||
RUN uv pip install -U --no-cache-dir pytesseract python-Levenshtein opencv-python nltk
|
||||
# RUN uv pip install --no-cache-dir natten==0.15.1+torch210cpu -f https://shi-labs.com/natten/wheels
|
||||
|
||||
@ -5,7 +5,7 @@ USER root
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git ffmpeg curl
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch<2.9' 'torchaudio' 'torchvision' 'torchcodec<0.8' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]"
|
||||
|
||||
|
||||
@ -5,7 +5,7 @@ USER root
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git-lfs ffmpeg curl
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch<2.9' 'torchaudio' 'torchvision' 'torchcodec<0.8' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken,num2words,video]"
|
||||
|
||||
|
||||
@ -9,15 +9,10 @@ SHELL ["sh", "-lc"]
|
||||
# The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
|
||||
# to be used as arguments for docker build (so far).
|
||||
|
||||
ARG PYTORCH='2.9.0'
|
||||
ARG PYTORCH='2.8.0'
|
||||
# Example: `cu102`, `cu113`, etc.
|
||||
ARG CUDA='cu126'
|
||||
|
||||
# This needs to be compatible with the above `PYTORCH`.
|
||||
ARG TORCHCODEC='0.8.0'
|
||||
|
||||
ARG FLASH_ATTN='false'
|
||||
|
||||
RUN apt update
|
||||
RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg git-lfs
|
||||
RUN git lfs install
|
||||
@ -26,48 +21,15 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip
|
||||
ARG REF=main
|
||||
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir -e ./transformers[dev]
|
||||
|
||||
# 1. Put several commands in a single `RUN` to avoid image/layer exporting issue. Could be revised in the future.
|
||||
# 2. For `torchcodec`, use `cpu` as we don't have `libnvcuvid.so` on the host runner. See https://github.com/meta-pytorch/torchcodec/issues/912
|
||||
# **Important**: We need to specify `torchcodec` version if the torch version is not the latest stable one.
|
||||
# 3. `set -e` means "exit immediately if any command fails".
|
||||
RUN set -e; \
|
||||
# Determine torch version
|
||||
if [ ${#PYTORCH} -gt 0 ] && [ "$PYTORCH" != "pre" ]; then \
|
||||
VERSION="torch==${PYTORCH}.*"; \
|
||||
TORCHCODEC_VERSION="torchcodec==${TORCHCODEC}.*"; \
|
||||
else \
|
||||
VERSION="torch"; \
|
||||
TORCHCODEC_VERSION="torchcodec"; \
|
||||
fi; \
|
||||
\
|
||||
# Log the version being installed
|
||||
echo "Installing torch version: $VERSION"; \
|
||||
\
|
||||
# Install PyTorch packages
|
||||
if [ "$PYTORCH" != "pre" ]; then \
|
||||
python3 -m pip install --no-cache-dir -U \
|
||||
$VERSION \
|
||||
torchvision \
|
||||
torchaudio \
|
||||
--extra-index-url https://download.pytorch.org/whl/$CUDA; \
|
||||
# We need to specify the version if the torch version is not the latest stable one.
|
||||
python3 -m pip install --no-cache-dir -U \
|
||||
$TORCHCODEC_VERSION --extra-index-url https://download.pytorch.org/whl/cpu; \
|
||||
else \
|
||||
python3 -m pip install --no-cache-dir -U --pre \
|
||||
torch \
|
||||
torchvision \
|
||||
torchaudio \
|
||||
--extra-index-url https://download.pytorch.org/whl/nightly/$CUDA; \
|
||||
python3 -m pip install --no-cache-dir -U --pre \
|
||||
torchcodec --extra-index-url https://download.pytorch.org/whl/nightly/cpu; \
|
||||
fi
|
||||
# 2. Regarding `torch` part, We might need to specify proper versions for `torchvision` and `torchaudio`.
|
||||
# Currently, let's not bother to specify their versions explicitly (so installed with their latest release versions).
|
||||
# 3. For `torchcodec<0.8`: this is quickly added as torch 2.9.0 + torchcodec 0.8.0 fails on our CI env. Need to remove later once they work.
|
||||
RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] && [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile && echo torch=$VERSION && [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio "torchcodec<0.8" --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir -U timm
|
||||
|
||||
RUN [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir --no-build-isolation git+https://github.com/facebookresearch/detectron2.git || echo "Don't install detectron2 with nightly torch"
|
||||
RUN [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git || echo "Don't install detectron2 with nightly torch"
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir pytesseract
|
||||
|
||||
@ -92,7 +54,7 @@ RUN python3 -m pip install --no-cache-dir bitsandbytes
|
||||
RUN python3 -m pip install --no-cache-dir quanto
|
||||
|
||||
# After using A10 as CI runner, let's run FA2 tests
|
||||
RUN [ "$FLASH_ATTN" != "false" ] && python3 -m pip uninstall -y ninja && python3 -m pip install --no-cache-dir ninja && python3 -m pip install flash-attn --no-cache-dir --no-build-isolation || echo "Don't install FA2 with nightly torch"
|
||||
RUN [ "$PYTORCH" != "pre" ] && python3 -m pip uninstall -y ninja && python3 -m pip install --no-cache-dir ninja && python3 -m pip install flash-attn --no-cache-dir --no-build-isolation || echo "Don't install FA2 with nightly torch"
|
||||
|
||||
# TODO (ydshieh): check this again
|
||||
# `quanto` will install `ninja` which leads to many `CUDA error: an illegal memory access ...` in some model tests
|
||||
|
||||
@ -10,7 +10,7 @@ RUN apt-get -y update && apt-get install -y libsndfile1-dev && apt install -y te
|
||||
# Torch needs to be installed before deepspeed
|
||||
RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed]
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir --no-build-isolation torchvision git+https://github.com/facebookresearch/detectron2.git pytesseract
|
||||
RUN python3 -m pip install --no-cache-dir torchvision git+https://github.com/facebookresearch/detectron2.git pytesseract
|
||||
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
||||
|
||||
# Test if the image could successfully build the doc. before publishing the image
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
FROM rocm/pytorch:rocm7.0.2_ubuntu24.04_py3.12_pytorch_release_2.7.1
|
||||
FROM rocm/pytorch:rocm6.4.1_ubuntu24.04_py3.12_pytorch_release_2.7.1
|
||||
LABEL maintainer="Hugging Face"
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
@ -10,8 +10,8 @@ RUN apt update && \
|
||||
|
||||
RUN git lfs install
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip numpy importlib-metadata setuptools wheel ninja pytesseract "itsdangerous<2.1.0"
|
||||
RUN python3 -m pip install --no-cache-dir --no-build-isolation git+https://github.com/facebookresearch/detectron2.git
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip numpy
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade importlib-metadata setuptools ninja git+https://github.com/facebookresearch/detectron2.git pytesseract "itsdangerous<2.1.0"
|
||||
|
||||
ARG REF=main
|
||||
WORKDIR /
|
||||
@ -39,7 +39,6 @@ RUN python3 -m pip install --no-cache-dir "torchcodec==0.5"
|
||||
# Install flash attention from source. Tested with commit 6387433156558135a998d5568a9d74c1778666d8
|
||||
RUN git clone https://github.com/ROCm/flash-attention/ -b tridao && \
|
||||
cd flash-attention && \
|
||||
GPU_ARCHS="gfx942" python setup.py install
|
||||
# GPU_ARCHS builds for MI300, MI325 but not MI355: we would need to add `;gfx950` but it takes too long to build.
|
||||
GPU_ARCHS="gfx942" python setup.py install
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir einops
|
||||
|
||||
@ -29,7 +29,7 @@ RUN python3 -m pip uninstall -y apex torch torchvision torchaudio
|
||||
RUN python3 -m pip install torch==$PYTORCH torchvision==$TORCH_VISION torchaudio==$TORCH_AUDIO --index-url https://download.pytorch.org/whl/rocm$ROCM --no-cache-dir
|
||||
|
||||
# Pre-build DeepSpeed, so it's be ready for testing (to avoid timeout)
|
||||
RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --no-build-isolation --config-settings="--build-option=build_ext" --config-settings="--build-option=-j8" --no-cache-dir -v --disable-pip-version-check 2>&1
|
||||
RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache-dir -v --disable-pip-version-check 2>&1
|
||||
|
||||
ARG REF=main
|
||||
WORKDIR /
|
||||
|
||||
@ -21,7 +21,7 @@ RUN python3 -m pip install --no-cache-dir './transformers[deepspeed-testing]' 'p
|
||||
# Install latest release PyTorch
|
||||
# (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.)
|
||||
# (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
|
||||
RUN python3 -m pip uninstall -y torch torchvision torchaudio torchcodec && python3 -m pip install --no-cache-dir -U torch==$PYTORCH torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
RUN python3 -m pip uninstall -y torch torchvision torchaudio && python3 -m pip install --no-cache-dir -U torch==$PYTORCH torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||
|
||||
@ -43,7 +43,7 @@ RUN python3 -m pip uninstall -y deepspeed
|
||||
# This has to be run (again) inside the GPU VMs running the tests.
|
||||
# The installation works here, but some tests fail, if we don't pre-build deepspeed again in the VMs running the tests.
|
||||
# TODO: Find out why test fail.
|
||||
RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --no-build-isolation --config-settings="--build-option=build_ext" --config-settings="--build-option=-j8" --no-cache -v --disable-pip-version-check 2>&1
|
||||
RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
|
||||
|
||||
# `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
|
||||
RUN python3 -m pip uninstall -y kernels
|
||||
|
||||
@ -24,7 +24,7 @@ RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch';
|
||||
RUN echo torch=$VERSION
|
||||
# `torchvision` and `torchaudio` should be installed along with `torch`, especially for nightly build.
|
||||
# Currently, let's just use their latest releases (when `torch` is installed with a release version)
|
||||
RUN python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
RUN python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/$CUDA
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||
|
||||
@ -50,7 +50,7 @@ RUN python3 -m pip install --no-cache-dir hqq
|
||||
RUN python3 -m pip install --no-cache-dir gguf
|
||||
|
||||
# Add autoawq for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir --no-build-isolation autoawq[kernels]
|
||||
RUN python3 -m pip install --no-cache-dir autoawq[kernels]
|
||||
|
||||
# Add quanto for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir optimum-quanto
|
||||
@ -81,7 +81,7 @@ RUN python3 -m pip uninstall -y flash-attn
|
||||
RUN cd transformers && python3 setup.py develop
|
||||
|
||||
# Add fp-quant for quantization testing
|
||||
RUN python3 -m pip install --no-cache-dir "fp-quant>=0.3.2"
|
||||
RUN python3 -m pip install --no-cache-dir "fp-quant>=0.2.0"
|
||||
|
||||
# Low usage or incompatible lib, will enable later on
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@ pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> This command might fail for some OS that are missing dependencies. Check step 4 in [Create a Pull Request](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#create-a-pull-request) to work around it.
|
||||
> This command might fail for some OS that are missing dependencies. Check step 4 in [Create a Pull Request](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#create-a-pull-request) to workaround it.
|
||||
|
||||
Then you need to install our special tool that builds the documentation:
|
||||
|
||||
@ -38,7 +38,7 @@ pip install git+https://github.com/huggingface/doc-builder
|
||||
|
||||
## Building the documentation
|
||||
|
||||
Once you have set up the `doc-builder` and additional packages, you can generate the documentation by
|
||||
Once you have setup the `doc-builder` and additional packages, you can generate the documentation by
|
||||
typing the following command:
|
||||
|
||||
```bash
|
||||
@ -295,11 +295,12 @@ Here's an example of a tuple return, comprising several objects:
|
||||
Due to the rapidly growing repository, it is important to make sure that no files that would significantly weigh down the repository are added. This includes images, videos, and other non-text files. We prefer to leverage a hf.co hosted `dataset` like
|
||||
the ones hosted on [`hf-internal-testing`](https://huggingface.co/hf-internal-testing) in which to place these files and reference
|
||||
them by URL. We recommend putting them in the following dataset: [huggingface/documentation-images](https://huggingface.co/datasets/huggingface/documentation-images).
|
||||
If an external contribution, feel free to add the images to your PR and ask a Hugging Face member to migrate them to this dataset.
|
||||
If an external contribution, feel free to add the images to your PR and ask a Hugging Face member to migrate your images
|
||||
to this dataset.
|
||||
|
||||
## Styling the docstring
|
||||
|
||||
We have an automatic script running with the `make style` command that will make sure that:
|
||||
We have an automatic script running with the `make style` comment that will make sure that:
|
||||
- the docstrings fully take advantage of the line width
|
||||
- all code examples are formatted using black, like the code of the Transformers library
|
||||
|
||||
|
||||
@ -258,6 +258,8 @@
|
||||
# title: النماذج
|
||||
# - local: main_classes/text_generation
|
||||
# title: توليد النصوص
|
||||
# - local: main_classes/onnx
|
||||
# title: ONNX
|
||||
# - local: main_classes/optimizer_schedules
|
||||
# title: التحسين
|
||||
# - local: main_classes/output
|
||||
|
||||
@ -508,16 +508,16 @@ BERT `_init_weights` Methode:
|
||||
def _init_weights(self, module):
|
||||
"""Initialize the weights"""
|
||||
if isinstance(module, nn.Linear):
|
||||
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
if module.bias is not None:
|
||||
module.bias.zero_()
|
||||
module.bias.data.zero_()
|
||||
elif isinstance(module, nn.Embedding):
|
||||
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
if module.padding_idx is not None:
|
||||
module.weight.data[module.padding_idx].zero_()
|
||||
elif isinstance(module, nn.LayerNorm):
|
||||
module.bias.zero_()
|
||||
module.weight.fill_(1.0)
|
||||
module.bias.data.zero_()
|
||||
module.weight.data.fill_(1.0)
|
||||
```
|
||||
|
||||
Sie können weitere benutzerdefinierte Schemata verwenden, wenn Sie eine spezielle Initialisierung für einige Module benötigen. Zum Beispiel in
|
||||
@ -533,9 +533,9 @@ def _init_weights(self, module):
|
||||
module.project_hid._is_hf_initialized = True
|
||||
module.project_q._is_hf_initialized = True
|
||||
elif isinstance(module, nn.Linear):
|
||||
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
if module.bias is not None:
|
||||
module.bias.zero_()
|
||||
module.bias.data.zero_()
|
||||
```
|
||||
|
||||
Das Flag `_is_hf_initialized` wird intern verwendet, um sicherzustellen, dass wir ein Submodul nur einmal initialisieren. Wenn Sie es auf
|
||||
|
||||
@ -119,8 +119,6 @@
|
||||
title: Tools
|
||||
- local: transformers_as_backend
|
||||
title: Inference server backends
|
||||
- local: continuous_batching
|
||||
title: Continuous Batching
|
||||
title: Inference
|
||||
- isExpanded: false
|
||||
sections:
|
||||
|
||||
@ -314,16 +314,16 @@ Random initialization occurs in the `_init_weights` method of `BrandNewLlamaPreT
|
||||
def _init_weights(self, module):
|
||||
"""Initialize the weights"""
|
||||
if isinstance(module, nn.Linear):
|
||||
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
if module.bias is not None:
|
||||
module.bias.zero_()
|
||||
module.bias.data.zero_()
|
||||
elif isinstance(module, nn.Embedding):
|
||||
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
if module.padding_idx is not None:
|
||||
module.weight.data[module.padding_idx].zero_()
|
||||
elif isinstance(module, nn.LayerNorm):
|
||||
module.bias.zero_()
|
||||
module.weight.fill_(1.0)
|
||||
module.bias.data.zero_()
|
||||
module.weight.data.fill_(1.0)
|
||||
```
|
||||
|
||||
The initialization scheme can look different if you need to adapt it to your model. For example, [`Wav2Vec2ForPreTraining`] initializes [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) in its last two linear layers.
|
||||
@ -339,9 +339,9 @@ def _init_weights(self, module):
|
||||
module.project_hid._is_hf_initialized = True
|
||||
module.project_q._is_hf_initialized = True
|
||||
elif isinstance(module, nn.Linear):
|
||||
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
if module.bias is not None:
|
||||
module.bias.zero_()
|
||||
module.bias.data.zero_()
|
||||
```
|
||||
|
||||
### Convert checkpoints to Transformers
|
||||
|
||||
@ -1,194 +0,0 @@
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
|
||||
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
|
||||
rendered properly in your Markdown viewer.
|
||||
|
||||
-->
|
||||
|
||||
# Continuous Batching
|
||||
|
||||
Continuous Batching (CB) is an advanced technique to optimize the inference of transformer models by dynamically grouping multiple requests into batches. This approach maximizes GPU utilization and throughput, specifically for workloads with many variable-length inputs.
|
||||
|
||||
We are particularly interested in having Continuous Batching in transformers for the following use cases:
|
||||
- Evaluation of models on large datasets with variable-length inputs
|
||||
- Generating outputs for multiple sequences for GRPO policies
|
||||
|
||||
CB is what makes inference engines like vLLM or SGLang efficient. That being said, transformers does not aim to be a production-ready inference engine, but a complete framework for model development. For this reason, CB is available in `transformers serve`.
|
||||
|
||||
If you are not familiar with some of the core concepts CB is built upon, we invite you to read the associated blog post: [Continuous Batching: Efficient Inference for Large Language Models](https://huggingface.co/blog/continuous-batching). _broken link for now_
|
||||
|
||||
## API Reference
|
||||
|
||||
## Usage Examples
|
||||
|
||||
The main way to use CB in transformers is via the `generate_batch` method.
|
||||
|
||||
Unlike `generate`, CB takes already tokenized inputs, known as input IDs. Each sequence of input IDs is represented as a list of integers, in python: `list[int]`. Since
|
||||
|
||||
For a more detailed example, please refer to: [examples/continuous_batching](./path/to/example)
|
||||
|
||||
### `generate_batch` example
|
||||
|
||||
We have created a `ContinuousMixin` that is inherited by the `GenerationMixin` so that all auto regressive text models support CB.
|
||||
|
||||
This adds the `generate_batch` method to all models that inherit from `GenerationMixin`.
|
||||
|
||||
You can use it as follows:
|
||||
|
||||
```py
|
||||
import datasets
|
||||
import torch
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
from transformers.generation import GenerationConfig
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"Qwen/Qwen3-4B-Instruct-2507",
|
||||
attn_implementation="spda_paged",
|
||||
device_map="cuda", # if you need cuda
|
||||
dtype=torch.bfloat16,
|
||||
)
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, padding_side="left")
|
||||
|
||||
# prepare a batch of inputs
|
||||
dataset = datasets.load_dataset("openai/gsm8k", "socratic", split="test")
|
||||
dataset = dataset.select(range(args.samples))
|
||||
tokenized_datasets = dataset.map(lambda x: tokenizer(x["question"]), batched=True)
|
||||
simple_batch_inputs = [item["input_ids"] for item in tokenized_datasets]
|
||||
|
||||
generation_config = GenerationConfig(
|
||||
max_new_tokens=32,
|
||||
use_cuda_graph=False, # Not supported for simple version
|
||||
eos_token_id=tokenizer.eos_token_id,
|
||||
pad_token_id=tokenizer.pad_token_id,
|
||||
do_sample=False,
|
||||
max_batch_tokens=512, # max number of tokens in a batch, this is just a default value you should tune based on your hardware
|
||||
)
|
||||
|
||||
batch_outputs = model.generate_batch(
|
||||
inputs=simple_batch_inputs,
|
||||
generation_config=generation_config,
|
||||
)
|
||||
|
||||
for request_id, output in batch_outputs.items():
|
||||
generated_text = tokenizer.decode(output.generated_tokens, skip_special_tokens=True)
|
||||
print(f"Request {request_id} output: {generated_text}")
|
||||
```
|
||||
|
||||
### `ContinuousBatchingManager` example
|
||||
|
||||
If you want more control w.r.t. how you want to schedule requests using CB, you can use the `ContinuousBatchingManager` class directly.
|
||||
|
||||
This is what we use in `transformers serve` because requests arrive asynchronously and we can leverage the asynchronous nature of the CB process to make things more efficient.
|
||||
|
||||
Under the hood, the `ContinuousBatchingManager` creates a background thread that receives inputs from a python `queue.Queue` which it uses to get requests to batch in each forward pass.
|
||||
|
||||
Note that the manager is thread safe!
|
||||
|
||||
```py
|
||||
import datasets
|
||||
import torch
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
from transformers.generation import GenerationConfig
|
||||
from transformers.generation.continuous_batching import RequestStatus
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"Qwen/Qwen3-4B-Instruct-2507",
|
||||
attn_implementation="spda_paged",
|
||||
device_map="cuda", # if you need cuda
|
||||
dtype=torch.bfloat16,
|
||||
)
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, padding_side="left")
|
||||
|
||||
# prepare a batch of inputs
|
||||
dataset = datasets.load_dataset("openai/gsm8k", "socratic", split="test")
|
||||
dataset = dataset.select(range(args.samples))
|
||||
tokenized_datasets = dataset.map(lambda x: tokenizer(x["question"]), batched=True)
|
||||
simple_batch_inputs = [item["input_ids"] for item in tokenized_datasets]
|
||||
|
||||
# initialize the manager, available method thanks to the `ContinuousMixin`
|
||||
manager = model.init_continuous_batching(generation_config=generation_config)
|
||||
|
||||
# start the background thread
|
||||
manager.start()
|
||||
|
||||
# this is for demonstration purposes only, in practice this is most useful to do concurrently
|
||||
for i, input in enumerate(simple_batch_inputs):
|
||||
request_id = manager.add_request(input_ids=input, request_id=f"request_{i}") # if you do not specify a request_id, one will be generated for you
|
||||
|
||||
# Can be done in an other thread
|
||||
for id, request in manager.get_result():
|
||||
generated_text = tokenizer.decode(request.generated_tokens, skip_special_tokens=True)
|
||||
print(f"Request {id} output: {generated_text}")
|
||||
|
||||
# you can also get results for a specific request id
|
||||
result = manager.get_result(request_id="request_5") # this is blocking and will wait for the result to be ready
|
||||
|
||||
# or get results for a request that is streaming
|
||||
manager.add_request(
|
||||
input_ids=input,
|
||||
request_id="streaming_request",
|
||||
stream=True,
|
||||
)
|
||||
for chunk in manager.request_id_iter(request_id="streaming_request"):
|
||||
generated_text = tokenizer.decode(chunk.generated_tokens, skip_special_tokens=True)
|
||||
print(generated_text)
|
||||
# FIXME: stop iteration in `request_id_iter` when finished instead of doing it externally
|
||||
if chunk.status == RequestStatus.FINISHED:
|
||||
break
|
||||
|
||||
# stop the background thread before exiting the process
|
||||
manager.stop()
|
||||
```
|
||||
|
||||
## Supported & Unsupported Features
|
||||
|
||||
### Supported Features
|
||||
|
||||
- Dynamic scheduling of variable-length requests
|
||||
- Chunked prefill
|
||||
- Paged Attention Cache
|
||||
- Sliding window attention
|
||||
- Chat templates
|
||||
|
||||
### Unsupported Features
|
||||
|
||||
At the moment, the following features are not supported with CB. We plan to add support to the following:
|
||||
|
||||
- Prefix caching
|
||||
- Beam search
|
||||
- tool calling
|
||||
|
||||
The others are unplanned, but depending on community requests we might consider adding them:
|
||||
|
||||
- MTP (multi token prediction)
|
||||
- Medusa
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
|
||||
## Integration with Serving
|
||||
|
||||
You can use CB in `transformers serve` by passing the `--continuous-batching` flag when starting the server.
|
||||
|
||||
## Monitoring
|
||||
|
||||
We have added `opentelemetry` support to Continuous Batching to help you monitor its performance in production. To enable it, you need to install the `opentelemetry` extra when installing `transformers`:
|
||||
|
||||
```sh
|
||||
# this installs `opentelemetry-api`, `opentelemetry-sdk` and `opentelemetry-exporter-otlp`
|
||||
pip install transformers[open-telemetry]
|
||||
```
|
||||
|
||||
This will enable traces and metrics collection in CB. You will then have to setup the backend to collect and visualize the traces and metrics.
|
||||
|
||||
@ -393,9 +393,3 @@ model = AutoModelForCausalLM.from_pretrained(
|
||||
"mistralai/Mistral-7B-v0.1", quantization_config=quant_config, device_map="auto"
|
||||
)
|
||||
```
|
||||
|
||||
## Continuous Batching
|
||||
|
||||
When serving LLMs for inference, you may have multiple requests arriving at different times. Continuous Batching (CB) is a technique that groups incoming requests into batches to maximize GPU utilization and throughput.
|
||||
|
||||
See the [Continuous Batching](./continuous_batching) guide for more details on how to use CB in transformers.
|
||||
|
||||
@ -158,24 +158,6 @@ print("Retrieval scores (query x image):")
|
||||
print(scores)
|
||||
```
|
||||
|
||||
You can also use checkpoints for `ColQwen2.5` that are **compatible with the ColQwen2 architecture**. This version of the model uses [Qwen2_5_VL](./qwen2_5_vl) as the backbone.
|
||||
|
||||
```python
|
||||
import torch
|
||||
from transformers import ColQwen2ForRetrieval, ColQwen2Processor
|
||||
from transformers.utils.import_utils import is_flash_attn_2_available
|
||||
|
||||
model_name = "Sahil-Kabir/colqwen2.5-v0.2-hf" # An existing compatible checkpoint
|
||||
|
||||
model = ColQwen2ForRetrieval.from_pretrained(
|
||||
model_name,
|
||||
dtype=torch.bfloat16,
|
||||
device_map="auto",
|
||||
attn_implementation="flash_attention_2" if is_flash_attn_2_available() else "sdpa"
|
||||
)
|
||||
processor = ColQwen2Processor.from_pretrained(model_name)
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
- [`~ColQwen2Processor.score_retrieval`] returns a 2D tensor where the first dimension is the number of queries and the second dimension is the number of images. A higher score indicates more similarity between the query and image.
|
||||
|
||||
@ -169,9 +169,6 @@ print("Pooled output shape:", pooled_output.shape)
|
||||
[[autodoc]] DINOv3ViTModel
|
||||
- forward
|
||||
|
||||
## DINOv3ViTBackbone
|
||||
[[autodoc]] DINOv3ViTBackbone
|
||||
|
||||
## DINOv3ConvNextModel
|
||||
|
||||
[[autodoc]] DINOv3ConvNextModel
|
||||
|
||||
@ -75,11 +75,11 @@ A processor requires an image_processor and a tokenizer. Hence, inputs can be lo
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer
|
||||
from transformers.models.fuyu.processing_fuyu import FuyuProcessor
|
||||
from transformers.models.fuyu.image_processing_fuyu_fast import FuyuImageProcessorFast
|
||||
from transformers.models.fuyu.image_processing_fuyu import FuyuImageProcessor
|
||||
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained('adept-hf-collab/fuyu-8b')
|
||||
image_processor = FuyuImageProcessorFast()
|
||||
image_processor = FuyuImageProcessor()
|
||||
|
||||
|
||||
processor = FuyuProcessor(image_processor=image_processor, tokenizer=tokenizer)
|
||||
@ -118,11 +118,6 @@ The `LlamaTokenizer` is used as it is a standard wrapper around sentencepiece.
|
||||
[[autodoc]] FuyuImageProcessor
|
||||
- __call__
|
||||
|
||||
## FuyuImageProcessor
|
||||
|
||||
[[autodoc]] FuyuImageProcessorFast
|
||||
- __call__
|
||||
|
||||
## FuyuProcessor
|
||||
|
||||
[[autodoc]] FuyuProcessor
|
||||
|
||||
@ -61,11 +61,6 @@ A list of official Hugging Face and community (indicated by 🌎) resources to h
|
||||
[[autodoc]] GLPNImageProcessor
|
||||
- preprocess
|
||||
|
||||
## GLPNImageProcessorFast
|
||||
|
||||
[[autodoc]] GLPNImageProcessorFast
|
||||
- preprocess
|
||||
|
||||
## GLPNModel
|
||||
|
||||
[[autodoc]] GLPNModel
|
||||
|
||||
@ -159,7 +159,7 @@ conversation3 = [
|
||||
|
||||
conversations = [conversation1, conversation2, conversation3]
|
||||
inputs = processor.apply_chat_template(
|
||||
conversations,
|
||||
conversation,
|
||||
add_generation_prompt=True,
|
||||
tokenize=True,
|
||||
return_dict=True,
|
||||
|
||||
@ -154,7 +154,7 @@ pip install schedulefree
|
||||
|
||||
[Schedule Free optimizer (SFO)](https://hf.co/papers/2405.15682) replaces the base optimizers momentum with a combination of averaging and interpolation. Unlike a traditional scheduler, SFO completely removes the need to anneal the learning rate.
|
||||
|
||||
SFO supports the RAdam (`schedule_free_radam`), AdamW (`schedule_free_adamw`) and SGD (`schedule_free_sgd`) optimizers. The RAdam scheduler doesn't require `warmup_steps`.
|
||||
SFO supports the RAdam (`schedule_free_radam`), AdamW (`schedule_free_adamw`) and SGD (`schedule_free_sgd`) optimizers. The RAdam scheduler doesn't require `warmup_steps` or `warmup_ratio`.
|
||||
|
||||
By default, it is recommended to set `lr_scheduler_type="constant"`. Other `lr_scheduler_type` values may also work, but combining SFO optimizers with other learning rate schedules could affect SFOs intended behavior and performance.
|
||||
|
||||
|
||||
@ -149,7 +149,7 @@ The example below packs `up_proj` and `gate_proj` into a single `gate_up_proj` m
|
||||
```python
|
||||
class Llama4TextExperts(nn.Module):
|
||||
...
|
||||
self.gate_up_proj = nn.Parameter(torch.zeros(self.num_experts, self.hidden_size, 2 * self.expert_dim))
|
||||
self.gate_up_proj = nn.Parameter(torch.empty(self.num_experts, self.hidden_size, 2 * self.expert_dim))
|
||||
```
|
||||
|
||||
Batch matrix multiplication can be used in the `forward` pass to compute the output of the `gate_up_proj` module.
|
||||
|
||||
@ -38,7 +38,7 @@ pip install transformers[dev]
|
||||
or for an editable install:
|
||||
|
||||
```bash
|
||||
pip install -e ".[dev]"
|
||||
pip install -e .[dev]
|
||||
```
|
||||
|
||||
inside the Transformers repo. Since the number of optional dependencies of Transformers has grown a lot, it's possible you don't manage to get all of them. If the dev install fails, make sure to install PyTorch then do
|
||||
@ -50,7 +50,7 @@ pip install transformers[quality]
|
||||
or for an editable install:
|
||||
|
||||
```bash
|
||||
pip install -e ".[quality]"
|
||||
pip install -e .[quality]
|
||||
```
|
||||
|
||||
## Tests
|
||||
|
||||
@ -40,7 +40,7 @@ You can choose between MXFP4 and NVFP4 with `FPQuantConfig(forward_dtype="mxfp4"
|
||||
|
||||
A **Blackwell-generation GPU is required** to run the kernels. Runtime support for FP-Quant is implemented through the [QuTLASS](https://github.com/IST-DASLab/qutlass) library and a lightweight PyTorch interface lib [`fp_quant`](https://github.com/IST-DASLab/FP-Quant/tree/master/inference_lib). We recommend installing the former **from source** and the latter with `pip install fp_quant`.
|
||||
|
||||
Users **without a Blackwell-generation GPU** , can use the method with `quantization_config=FPQuantConfig(pseudoquantization=True)` without having to install [QuTLASS](https://github.com/IST-DASLab/qutlass). This would provide no speedups but would fully emulate the effect of quantization.
|
||||
Users **without a Blackwell-generation GPU** , can use the method with `quantization_config=FPQuantConfig(pseudoquant=True)` without having to install [QuTLASS](https://github.com/IST-DASLab/qutlass). This would provide no speedups but would fully emulate the effect of quantization.
|
||||
|
||||
> [!TIP]
|
||||
> Find models pre-quantized with FP-Quant in the official ISTA-DASLab [collection](https://huggingface.co/collections/ISTA-DASLab/fp-quant-6877c186103a21d3a02568ee).
|
||||
|
||||
@ -383,30 +383,6 @@ transformers serve \
|
||||
--attn_implementation "sdpa"
|
||||
```
|
||||
|
||||
### Quantization
|
||||
|
||||
transformers serve is compatible with all [quantization methods](https://huggingface.co/docs/transformers/main/quantization/overview) supported in transformers. Quantization can significantly reduce memory usage and improve inference speed, with two main workflows: pre-quantized models and on-the-fly quantization.
|
||||
|
||||
#### Pre-quantized Models
|
||||
|
||||
For models that are already quantized (e.g., GPTQ, AWQ, bitsandbytes), simply choose a quantized model name for serving.
|
||||
Make sure to install the required libraries listed in the quantization documentation.
|
||||
|
||||
> [!TIP]
|
||||
> Pre-quantized models generally provide the best balance of performance and accuracy.
|
||||
|
||||
#### On the fly quantization
|
||||
|
||||
If you want to quantize a model at runtime, you can specify the --quantization flag in the CLI. Note that not all quantization methods support on-the-fly conversion. The full list of supported methods is available in the quantization [overview](https://huggingface.co/docs/transformers/main/quantization/overview).
|
||||
|
||||
Currently, with transformers serve, we only supports some methods: ["bnb-4bit", "bnb-8bit"]
|
||||
|
||||
For example, to enable 4-bit quantization with bitsandbytes, you need to pass add `--quantization bnb-4bit`:
|
||||
|
||||
```sh
|
||||
transformers serve --quantization bnb-4bit
|
||||
```
|
||||
|
||||
### Performance tips
|
||||
|
||||
- Use an efficient attention backend when available:
|
||||
@ -421,4 +397,6 @@ transformers serve \
|
||||
|
||||
- `--dtype {bfloat16|float16}` typically improve throughput and memory use vs. `float32`
|
||||
|
||||
- `--load_in_4bit`/`--load_in_8bit` can reduce memory footprint for LoRA setups
|
||||
|
||||
- `--force-model <repo_id>` avoids per-request model hints and helps produce stable, repeatable runs
|
||||
|
||||
@ -220,7 +220,7 @@ At this point, only three steps remain:
|
||||
... gradient_accumulation_steps=4,
|
||||
... per_device_eval_batch_size=32,
|
||||
... num_train_epochs=10,
|
||||
... warmup_steps=0.1,
|
||||
... warmup_ratio=0.1,
|
||||
... logging_steps=10,
|
||||
... load_best_model_at_end=True,
|
||||
... metric_for_best_model="accuracy",
|
||||
|
||||
@ -211,7 +211,7 @@ At this point, only three steps remain:
|
||||
... gradient_accumulation_steps=4,
|
||||
... per_device_eval_batch_size=16,
|
||||
... num_train_epochs=3,
|
||||
... warmup_steps=0.1,
|
||||
... warmup_ratio=0.1,
|
||||
... logging_steps=10,
|
||||
... load_best_model_at_end=True,
|
||||
... metric_for_best_model="accuracy",
|
||||
|
||||
@ -378,7 +378,7 @@ Most of the training arguments are self-explanatory, but one that is quite impor
|
||||
... learning_rate=5e-5,
|
||||
... per_device_train_batch_size=batch_size,
|
||||
... per_device_eval_batch_size=batch_size,
|
||||
... warmup_steps=0.1,
|
||||
... warmup_ratio=0.1,
|
||||
... logging_steps=10,
|
||||
... load_best_model_at_end=True,
|
||||
... metric_for_best_model="accuracy",
|
||||
|
||||
@ -187,7 +187,7 @@ from torch import nn
|
||||
from transformers import Trainer
|
||||
|
||||
class CustomTrainer(Trainer):
|
||||
def compute_loss(self, model: nn.Module, inputs: dict[str, Union[torch.Tensor, Any]], return_outputs: bool = False, num_items_in_batch: Optional[torch.Tensor] = None):
|
||||
def compute_loss(self, model: nn.Module, inputs: dict[str, Union[torch.Tensor, Any]], return_outputs: bool = False num_items_in_batch: Optional[torch.Tensor] = None):
|
||||
labels = inputs.pop("labels")
|
||||
# forward pass
|
||||
outputs = model(**inputs)
|
||||
|
||||
@ -37,7 +37,7 @@ pip install transformers[dev]
|
||||
o una instalación editable:
|
||||
|
||||
```bash
|
||||
pip install -e ".[dev]"
|
||||
pip install -e .[dev]
|
||||
```
|
||||
|
||||
del repositorio de Transformers.
|
||||
|
||||
@ -220,7 +220,7 @@ Al llegar a este punto, solo quedan tres pasos:
|
||||
... gradient_accumulation_steps=4,
|
||||
... per_device_eval_batch_size=32,
|
||||
... num_train_epochs=10,
|
||||
... warmup_steps=0.1,
|
||||
... warmup_ratio=0.1,
|
||||
... logging_steps=10,
|
||||
... load_best_model_at_end=True,
|
||||
... metric_for_best_model="accuracy",
|
||||
|
||||
@ -170,7 +170,7 @@ Per quanto riguarda la classe `TrainingArguments`:
|
||||
- L'argomento `evaluate_during_training` di `TrainingArguments` è deprecato a favore di `eval_strategy`.
|
||||
|
||||
Per quanto riguarda il modello Transfo-XL:
|
||||
- L'attributo di configurazione `tie_weight` di Transfo-XL diventa `tie_word_embeddings`.
|
||||
- L'attributo di configurazione `tie_weight` di Transfo-XL diventa `tie_words_embeddings`.
|
||||
- Il metodo di modellazione `reset_length` di Transfo-XL diventa `reset_memory_length`.
|
||||
|
||||
Per quanto riguarda le pipeline:
|
||||
|
||||
@ -37,7 +37,7 @@ pip install transformers[dev]
|
||||
o un'installazione modificabile:
|
||||
|
||||
```bash
|
||||
pip install -e ".[dev]"
|
||||
pip install -e .[dev]
|
||||
```
|
||||
|
||||
all'interno del repo Transformers.
|
||||
|
||||
@ -200,6 +200,8 @@
|
||||
title: モデル
|
||||
- local: main_classes/text_generation
|
||||
title: テキストの生成
|
||||
- local: main_classes/onnx
|
||||
title: ONNX
|
||||
- local: main_classes/optimizer_schedules
|
||||
title: 最適化
|
||||
- local: main_classes/output
|
||||
|
||||
@ -406,16 +406,16 @@ model = BrandNewBertModel(BrandNewBertConfig())
|
||||
def _init_weights(self, module):
|
||||
"""Initialize the weights"""
|
||||
if isinstance(module, nn.Linear):
|
||||
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
if module.bias is not None:
|
||||
module.bias.zero_()
|
||||
module.bias.data.zero_()
|
||||
elif isinstance(module, nn.Embedding):
|
||||
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
if module.padding_idx is not None:
|
||||
module.weight.data[module.padding_idx].zero_()
|
||||
elif isinstance(module, nn.LayerNorm):
|
||||
module.bias.zero_()
|
||||
module.weight.fill_(1.0)
|
||||
module.bias.data.zero_()
|
||||
module.weight.data.fill_(1.0)
|
||||
```
|
||||
|
||||
特定のモジュールに特別な初期化が必要な場合、カスタムスキームをさらに持つことができます。たとえば、
|
||||
@ -431,9 +431,9 @@ def _init_weights(self, module):
|
||||
module.project_hid._is_hf_initialized = True
|
||||
module.project_q._is_hf_initialized = True
|
||||
elif isinstance(module, nn.Linear):
|
||||
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
if module.bias is not None:
|
||||
module.bias.zero_()
|
||||
module.bias.data.zero_()
|
||||
```
|
||||
|
||||
`_is_hf_initialized`フラグは、サブモジュールを一度だけ初期化することを確実にするために内部で使用されます。
|
||||
|
||||
@ -1292,7 +1292,7 @@ DeepSpeed は、`LRRangeTest`、`OneCycle`、`WarmupLR`、および`WarmupDecayL
|
||||
したがって、スケジューラを設定しない場合、これがデフォルトで設定されるスケジューラになります。
|
||||
|
||||
設定ファイルで `scheduler` エントリを設定しない場合、[`Trainer`] は
|
||||
`--lr_scheduler_type`、`--learning_rate`、および `--warmup_steps` の値を設定します。
|
||||
`--lr_scheduler_type`、`--learning_rate`、および `--warmup_steps` または `--warmup_ratio` の値を設定します。
|
||||
🤗 それのトランスフォーマーバージョン。
|
||||
|
||||
以下は、`WarmupLR`の自動構成された`scheduler`エントリの例です。
|
||||
@ -1316,7 +1316,8 @@ DeepSpeed は、`LRRangeTest`、`OneCycle`、`WarmupLR`、および`WarmupDecayL
|
||||
|
||||
- `warmup_min_lr` の値は `0` です。
|
||||
- `warmup_max_lr` と `--learning_rate` の値。
|
||||
- `warmup_num_steps` と `--warmup_steps` の値 (指定されている場合)
|
||||
- `warmup_num_steps` と `--warmup_steps` の値 (指定されている場合)。それ以外の場合は `--warmup_ratio` を使用します
|
||||
トレーニング ステップの数を乗算し、切り上げます。
|
||||
- `total_num_steps` には `--max_steps` の値を指定するか、指定されていない場合は実行時に自動的に導出されます。
|
||||
環境、データセットのサイズ、およびその他のコマンド ライン引数 (
|
||||
`WarmupDecayLR`)。
|
||||
|
||||
@ -40,7 +40,7 @@ pip install transformers[dev]
|
||||
|
||||
|
||||
```bash
|
||||
pip install -e ".[dev]"
|
||||
pip install -e .[dev]
|
||||
```
|
||||
|
||||
トランスフォーマーズのリポジトリ内で作業しています。トランスフォーマーズのオプションの依存関係の数が増えたため、すべてを取得できない可能性があります。開発用インストールが失敗した場合、作業しているディープラーニングフレームワーク(PyTorch、TensorFlow、および/またはFlax)をインストールし、次の手順を実行してください。
|
||||
@ -53,7 +53,7 @@ pip install transformers[quality]
|
||||
または編集可能なインストールの場合:
|
||||
|
||||
```bash
|
||||
pip install -e ".[quality]"
|
||||
pip install -e .[quality]
|
||||
```
|
||||
|
||||
## Tests
|
||||
|
||||
@ -219,7 +219,7 @@ MInDS-14 データセットのサンプリング レートは 8khz です (こ
|
||||
... gradient_accumulation_steps=4,
|
||||
... per_device_eval_batch_size=32,
|
||||
... num_train_epochs=10,
|
||||
... warmup_steps=0.1,
|
||||
... warmup_ratio=0.1,
|
||||
... logging_steps=10,
|
||||
... load_best_model_at_end=True,
|
||||
... metric_for_best_model="accuracy",
|
||||
|
||||
@ -216,7 +216,7 @@ Datasets、🤗 データセット ライブラリから Food-101 データセ
|
||||
... gradient_accumulation_steps=4,
|
||||
... per_device_eval_batch_size=16,
|
||||
... num_train_epochs=3,
|
||||
... warmup_steps=0.1,
|
||||
... warmup_ratio=0.1,
|
||||
... logging_steps=10,
|
||||
... load_best_model_at_end=True,
|
||||
... metric_for_best_model="accuracy",
|
||||
|
||||
@ -360,7 +360,7 @@ You should probably TRAIN this model on a down-stream task to be able to use it
|
||||
... learning_rate=5e-5,
|
||||
... per_device_train_batch_size=batch_size,
|
||||
... per_device_eval_batch_size=batch_size,
|
||||
... warmup_steps=0.1,
|
||||
... warmup_ratio=0.1,
|
||||
... logging_steps=10,
|
||||
... load_best_model_at_end=True,
|
||||
... metric_for_best_model="accuracy",
|
||||
|
||||
@ -406,6 +406,8 @@
|
||||
title: Models
|
||||
- local: main_classes/text_generation
|
||||
title: 텍스트 생성
|
||||
- local: main_classes/onnx
|
||||
title: ONNX
|
||||
- local: main_classes/optimizer_schedules
|
||||
title: 최적화
|
||||
- local: main_classes/output
|
||||
|
||||
@ -348,16 +348,16 @@ model = BrandNewBertModel(BrandNewBertConfig())
|
||||
def _init_weights(self, module):
|
||||
"""Initialize the weights"""
|
||||
if isinstance(module, nn.Linear):
|
||||
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
if module.bias is not None:
|
||||
module.bias.zero_()
|
||||
module.bias.data.zero_()
|
||||
elif isinstance(module, nn.Embedding):
|
||||
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
if module.padding_idx is not None:
|
||||
module.weight.data[module.padding_idx].zero_()
|
||||
elif isinstance(module, nn.LayerNorm):
|
||||
module.bias.zero_()
|
||||
module.weight.fill_(1.0)
|
||||
module.bias.data.zero_()
|
||||
module.weight.data.fill_(1.0)
|
||||
```
|
||||
|
||||
몇 가지 모듈에 대해 특별한 초기화가 필요한 경우 사용자 정의 방식을 사용할 수도 있습니다. 예를 들어, `Wav2Vec2ForPreTraining`에서 마지막 두 개의 선형 레이어는 일반적인 PyTorch `nn.Linear`의 초기화를 가져야 하지만, 다른 모든 레이어는 위와 같은 초기화를 사용해야 합니다. 이는 다음과 같이 코드화됩니다:
|
||||
@ -371,9 +371,9 @@ def _init_weights(self, module):
|
||||
module.project_hid._is_hf_initialized = True
|
||||
module.project_q._is_hf_initialized = True
|
||||
elif isinstance(module, nn.Linear):
|
||||
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
|
||||
if module.bias is not None:
|
||||
module.bias.zero_()
|
||||
module.bias.data.zero_()
|
||||
```
|
||||
|
||||
`_is_hf_initialized` 플래그는 서브모듈을 한 번만 초기화하도록 내부적으로 사용됩니다. `module.project_q` 및 `module.project_hid`에 대해 `True`로 설정함으로써, 우리가 수행한 사용자 정의 초기화가 이후에 덮어쓰이지 않도록 합니다. 즉, `_init_weights` 함수가 이들에게 적용되지 않습니다.
|
||||
|
||||
@ -154,7 +154,7 @@ pip install schedulefree
|
||||
|
||||
[Schedule Free optimizer (SFO)](https://hf.co/papers/2405.15682)는 기본 옵티마이저의 모멘텀 대신 평균화(averaging)와 보간(interpolation)을 조합하여 사용합니다. 덕분에 기존의 학습률 스케줄러와 달리, SFO는 학습률을 점진적으로 낮추는 절차가 아예 필요 없습니다.
|
||||
|
||||
SFO는 RAdam(`schedule_free_radam`), AdamW(`schedule_free_adamw`), SGD(`schedule_free_sgd`) 옵티마이저를 지원합니다. RAdam 스케줄러는 `warmup_steps`.
|
||||
SFO는 RAdam(`schedule_free_radam`), AdamW(`schedule_free_adamw`), SGD(`schedule_free_sgd`) 옵티마이저를 지원합니다. RAdam 스케줄러는 `warmup_steps`나 `warmup_ratio` 설정이 필요하지 않습니다.
|
||||
|
||||
기본적으로 `lr_scheduler_type="constant"`로 설정하는 것을 권장합니다. 다른 `lr_scheduler_type` 값도 동작할 순 있으나, SFO 옵티마이저와 다른 학습률 스케줄을 함께 사용하면 SFO의 의도된 동작과 성능에 영향을 줄 수 있습니다.
|
||||
|
||||
|
||||
@ -152,7 +152,7 @@ class ParallelInterface(MutableMapping):
|
||||
```python
|
||||
class Llama4TextExperts(nn.Module):
|
||||
...
|
||||
self.gate_up_proj = nn.Parameter(torch.zeros(self.num_experts, self.hidden_size, 2 * self.expert_dim))
|
||||
self.gate_up_proj = nn.Parameter(torch.empty(self.num_experts, self.hidden_size, 2 * self.expert_dim))
|
||||
```
|
||||
|
||||
배치 행렬 곱셈을 `forward` 패스에서 사용하여 `gate_up_proj` 모듈의 출력을 계산할 수 있습니다.
|
||||
|
||||
@ -37,7 +37,7 @@ pip install transformers[dev]
|
||||
또는 Transformers 저장소 내에 편집 가능한 설치가 필요합니다:
|
||||
|
||||
```bash
|
||||
pip install -e ".[dev]"
|
||||
pip install -e .[dev]
|
||||
```
|
||||
|
||||
Transformers의 선택적 종속성 수가 많이 늘어났기 때문에 개발 설치를 실패할 수도 있습니다. 개발 설치가 실패하는 경우, 작업 중인 Deep Learning 프레임워크 (PyTorch, TensorFlow 및/또는 Flax)를 설치하고 다음 명령을 실행하세요.
|
||||
@ -49,7 +49,7 @@ pip install transformers[quality]
|
||||
편집 가능한 설치의 경우는 다음 명령을 실행하세요.
|
||||
|
||||
```bash
|
||||
pip install -e ".[quality]"
|
||||
pip install -e .[quality]
|
||||
```
|
||||
|
||||
|
||||
|
||||
@ -221,7 +221,7 @@ MinDS-14 데이터 세트의 샘플링 속도는 8khz이므로(이 정보는 [
|
||||
... gradient_accumulation_steps=4,
|
||||
... per_device_eval_batch_size=32,
|
||||
... num_train_epochs=10,
|
||||
... warmup_steps=0.1,
|
||||
... warmup_ratio=0.1,
|
||||
... logging_steps=10,
|
||||
... load_best_model_at_end=True,
|
||||
... metric_for_best_model="accuracy",
|
||||
|
||||
@ -212,7 +212,7 @@ Hugging Face 계정에 로그인하여 모델을 업로드하고 커뮤니티에
|
||||
... gradient_accumulation_steps=4,
|
||||
... per_device_eval_batch_size=16,
|
||||
... num_train_epochs=3,
|
||||
... warmup_steps=0.1,
|
||||
... warmup_ratio=0.1,
|
||||
... logging_steps=10,
|
||||
... load_best_model_at_end=True,
|
||||
... metric_for_best_model="accuracy",
|
||||
|
||||
@ -357,7 +357,7 @@ You should probably TRAIN this model on a down-stream task to be able to use it
|
||||
... learning_rate=5e-5,
|
||||
... per_device_train_batch_size=batch_size,
|
||||
... per_device_eval_batch_size=batch_size,
|
||||
... warmup_steps=0.1,
|
||||
... warmup_ratio=0.1,
|
||||
... logging_steps=10,
|
||||
... load_best_model_at_end=True,
|
||||
... metric_for_best_model="accuracy",
|
||||
|
||||
@ -107,6 +107,8 @@
|
||||
title: 模型
|
||||
- local: main_classes/text_generation
|
||||
title: 文本生成
|
||||
- local: main_classes/onnx
|
||||
title: ONNX
|
||||
- local: main_classes/optimizer_schedules
|
||||
title: Optimization
|
||||
- local: main_classes/output
|
||||
|
||||
@ -1206,7 +1206,7 @@ DeepSpeed支持`LRRangeTest`、`OneCycle`、`WarmupLR`和`WarmupDecayLR`学习
|
||||
- 通过 `--lr_scheduler_type constant_with_warmup` 实现 `WarmupLR`
|
||||
- 通过 `--lr_scheduler_type linear` 实现 `WarmupDecayLR`。这也是 `--lr_scheduler_type` 的默认值,因此,如果不配置调度器,这将是默认配置的调度器。
|
||||
|
||||
如果在配置文件中不配置 `scheduler` 条目,[`Trainer`] 将使用 `--lr_scheduler_type`、`--learning_rate` 和 `--warmup_steps` 的值来配置其🤗 Transformers 版本。
|
||||
如果在配置文件中不配置 `scheduler` 条目,[`Trainer`] 将使用 `--lr_scheduler_type`、`--learning_rate` 和 `--warmup_steps` 或 `--warmup_ratio` 的值来配置其🤗 Transformers 版本。
|
||||
|
||||
以下是 `WarmupLR` 的自动配置示例:
|
||||
|
||||
@ -1227,7 +1227,7 @@ DeepSpeed支持`LRRangeTest`、`OneCycle`、`WarmupLR`和`WarmupDecayLR`学习
|
||||
|
||||
- `warmup_min_lr` 的值为 `0`。
|
||||
- `warmup_max_lr` 的值为 `--learning_rate`。
|
||||
- `warmup_num_steps` 的值为 `--warmup_steps`(如果提供)。
|
||||
- `warmup_num_steps` 的值为 `--warmup_steps`(如果提供)。否则,将使用 `--warmup_ratio` 乘以训练步骤的数量,并四舍五入。
|
||||
- `total_num_steps` 的值为 `--max_steps` 或者如果没有提供,将在运行时根据环境、数据集的大小和其他命令行参数(对于 `WarmupDecayLR` 来说需要)自动推导。
|
||||
|
||||
当然,您可以接管任何或所有的配置值,并自行设置这些值:
|
||||
|
||||
21
examples/legacy/README.md
Normal file
21
examples/legacy/README.md
Normal file
@ -0,0 +1,21 @@
|
||||
<!---
|
||||
Copyright 2020 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
# Legacy examples
|
||||
|
||||
This folder contains examples which are not actively maintained (mostly contributed by the community).
|
||||
|
||||
Using these examples together with a recent version of the library usually requires to make small (sometimes big) adaptations to get the scripts working.
|
||||
26
examples/legacy/benchmarking/README.md
Normal file
26
examples/legacy/benchmarking/README.md
Normal file
@ -0,0 +1,26 @@
|
||||
<!---
|
||||
Copyright 2020 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
# 🤗 Benchmark results
|
||||
|
||||
Here, you can find a list of the different benchmark results created by the community.
|
||||
|
||||
If you would like to list benchmark results on your favorite models of the [model hub](https://huggingface.co/models) here, please open a Pull Request and add it below.
|
||||
|
||||
| Benchmark description | Results | Environment info | Author |
|
||||
|:----------|:-------------|:-------------|------:|
|
||||
| PyTorch Benchmark on inference for `google-bert/bert-base-cased` |[memory](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/inference_memory.csv) | [env](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/env.csv) | [Patrick von Platen](https://github.com/patrickvonplaten) |
|
||||
| PyTorch Benchmark on inference for `google-bert/bert-base-cased` |[time](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/inference_time.csv) | [env](https://github.com/patrickvonplaten/files_to_link_to/blob/master/bert_benchmark/env.csv) | [Patrick von Platen](https://github.com/patrickvonplaten) |
|
||||
178
examples/legacy/benchmarking/plot_csv_file.py
Normal file
178
examples/legacy/benchmarking/plot_csv_file.py
Normal file
@ -0,0 +1,178 @@
|
||||
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import csv
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from matplotlib.ticker import ScalarFormatter
|
||||
|
||||
from transformers import HfArgumentParser
|
||||
|
||||
|
||||
def list_field(default=None, metadata=None):
|
||||
return field(default_factory=lambda: default, metadata=metadata)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlotArguments:
|
||||
"""
|
||||
Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch.
|
||||
"""
|
||||
|
||||
csv_file: str = field(
|
||||
metadata={"help": "The csv file to plot."},
|
||||
)
|
||||
plot_along_batch: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Whether to plot along batch size or sequence length. Defaults to sequence length."},
|
||||
)
|
||||
is_time: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Whether the csv file has time results or memory results. Defaults to memory results."},
|
||||
)
|
||||
no_log_scale: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Disable logarithmic scale when plotting"},
|
||||
)
|
||||
is_train: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": "Whether the csv file has training results or inference results. Defaults to inference results."
|
||||
},
|
||||
)
|
||||
figure_png_file: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={"help": "Filename under which the plot will be saved. If unused no plot is saved."},
|
||||
)
|
||||
short_model_names: Optional[list[str]] = list_field(
|
||||
default=None, metadata={"help": "List of model names that are used instead of the ones in the csv file."}
|
||||
)
|
||||
|
||||
|
||||
def can_convert_to_int(string):
|
||||
try:
|
||||
int(string)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def can_convert_to_float(string):
|
||||
try:
|
||||
float(string)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
class Plot:
|
||||
def __init__(self, args):
|
||||
self.args = args
|
||||
self.result_dict = defaultdict(lambda: {"bsz": [], "seq_len": [], "result": {}})
|
||||
|
||||
with open(self.args.csv_file, newline="") as csv_file:
|
||||
reader = csv.DictReader(csv_file)
|
||||
for row in reader:
|
||||
model_name = row["model"]
|
||||
self.result_dict[model_name]["bsz"].append(int(row["batch_size"]))
|
||||
self.result_dict[model_name]["seq_len"].append(int(row["sequence_length"]))
|
||||
if can_convert_to_int(row["result"]):
|
||||
# value is not None
|
||||
self.result_dict[model_name]["result"][(int(row["batch_size"]), int(row["sequence_length"]))] = (
|
||||
int(row["result"])
|
||||
)
|
||||
elif can_convert_to_float(row["result"]):
|
||||
# value is not None
|
||||
self.result_dict[model_name]["result"][(int(row["batch_size"]), int(row["sequence_length"]))] = (
|
||||
float(row["result"])
|
||||
)
|
||||
|
||||
def plot(self):
|
||||
fig, ax = plt.subplots()
|
||||
title_str = "Time usage" if self.args.is_time else "Memory usage"
|
||||
title_str = title_str + " for training" if self.args.is_train else title_str + " for inference"
|
||||
|
||||
if not self.args.no_log_scale:
|
||||
# set logarithm scales
|
||||
ax.set_xscale("log")
|
||||
ax.set_yscale("log")
|
||||
|
||||
for axis in [ax.xaxis, ax.yaxis]:
|
||||
axis.set_major_formatter(ScalarFormatter())
|
||||
|
||||
for model_name_idx, model_name in enumerate(self.result_dict.keys()):
|
||||
batch_sizes = sorted(set(self.result_dict[model_name]["bsz"]))
|
||||
sequence_lengths = sorted(set(self.result_dict[model_name]["seq_len"]))
|
||||
results = self.result_dict[model_name]["result"]
|
||||
|
||||
(x_axis_array, inner_loop_array) = (
|
||||
(batch_sizes, sequence_lengths) if self.args.plot_along_batch else (sequence_lengths, batch_sizes)
|
||||
)
|
||||
|
||||
label_model_name = (
|
||||
model_name if self.args.short_model_names is None else self.args.short_model_names[model_name_idx]
|
||||
)
|
||||
|
||||
for inner_loop_value in inner_loop_array:
|
||||
if self.args.plot_along_batch:
|
||||
y_axis_array = np.asarray(
|
||||
[results[(x, inner_loop_value)] for x in x_axis_array if (x, inner_loop_value) in results],
|
||||
dtype=int,
|
||||
)
|
||||
else:
|
||||
y_axis_array = np.asarray(
|
||||
[results[(inner_loop_value, x)] for x in x_axis_array if (inner_loop_value, x) in results],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
(x_axis_label, inner_loop_label) = (
|
||||
("batch_size", "len") if self.args.plot_along_batch else ("in #tokens", "bsz")
|
||||
)
|
||||
|
||||
x_axis_array = np.asarray(x_axis_array, int)[: len(y_axis_array)]
|
||||
plt.scatter(
|
||||
x_axis_array, y_axis_array, label=f"{label_model_name} - {inner_loop_label}: {inner_loop_value}"
|
||||
)
|
||||
plt.plot(x_axis_array, y_axis_array, "--")
|
||||
|
||||
title_str += f" {label_model_name} vs."
|
||||
|
||||
title_str = title_str[:-4]
|
||||
y_axis_label = "Time in s" if self.args.is_time else "Memory in MB"
|
||||
|
||||
# plot
|
||||
plt.title(title_str)
|
||||
plt.xlabel(x_axis_label)
|
||||
plt.ylabel(y_axis_label)
|
||||
plt.legend()
|
||||
|
||||
if self.args.figure_png_file is not None:
|
||||
plt.savefig(self.args.figure_png_file)
|
||||
else:
|
||||
plt.show()
|
||||
|
||||
|
||||
def main():
|
||||
parser = HfArgumentParser(PlotArguments)
|
||||
plot_args = parser.parse_args_into_dataclasses()[0]
|
||||
plot = Plot(args=plot_args)
|
||||
plot.plot()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
1
examples/legacy/benchmarking/requirements.txt
Normal file
1
examples/legacy/benchmarking/requirements.txt
Normal file
@ -0,0 +1 @@
|
||||
torch >= 1.3
|
||||
47
examples/legacy/benchmarking/run_benchmark.py
Normal file
47
examples/legacy/benchmarking/run_benchmark.py
Normal file
@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright 2020 The HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Benchmarking the library on inference and training"""
|
||||
|
||||
from transformers import HfArgumentParser, PyTorchBenchmark, PyTorchBenchmarkArguments
|
||||
|
||||
|
||||
def main():
|
||||
parser = HfArgumentParser(PyTorchBenchmarkArguments)
|
||||
try:
|
||||
benchmark_args = parser.parse_args_into_dataclasses()[0]
|
||||
except ValueError as e:
|
||||
arg_error_msg = "Arg --no_{0} is no longer used, please use --no-{0} instead."
|
||||
begin_error_msg = " ".join(str(e).split(" ")[:-1])
|
||||
full_error_msg = ""
|
||||
depreciated_args = eval(str(e).split(" ")[-1])
|
||||
wrong_args = []
|
||||
for arg in depreciated_args:
|
||||
# arg[2:] removes '--'
|
||||
if arg[2:] in PyTorchBenchmarkArguments.deprecated_args:
|
||||
# arg[5:] removes '--no_'
|
||||
full_error_msg += arg_error_msg.format(arg[5:])
|
||||
else:
|
||||
wrong_args.append(arg)
|
||||
if len(wrong_args) > 0:
|
||||
full_error_msg = full_error_msg + begin_error_msg + str(wrong_args)
|
||||
raise ValueError(full_error_msg)
|
||||
|
||||
benchmark = PyTorchBenchmark(args=benchmark_args)
|
||||
benchmark.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
232
examples/legacy/multiple_choice/run_multiple_choice.py
Normal file
232
examples/legacy/multiple_choice/run_multiple_choice.py
Normal file
@ -0,0 +1,232 @@
|
||||
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Finetuning the library models for multiple choice (Bert, Roberta, XLNet)."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
from utils_multiple_choice import MultipleChoiceDataset, Split, processors
|
||||
|
||||
import transformers
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoModelForMultipleChoice,
|
||||
AutoTokenizer,
|
||||
DataCollatorWithPadding,
|
||||
EvalPrediction,
|
||||
HfArgumentParser,
|
||||
Trainer,
|
||||
TrainingArguments,
|
||||
set_seed,
|
||||
)
|
||||
from transformers.trainer_utils import is_main_process
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def simple_accuracy(preds, labels):
|
||||
return (preds == labels).mean()
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelArguments:
|
||||
"""
|
||||
Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
|
||||
"""
|
||||
|
||||
model_name_or_path: str = field(
|
||||
metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
|
||||
)
|
||||
config_name: Optional[str] = field(
|
||||
default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
|
||||
)
|
||||
tokenizer_name: Optional[str] = field(
|
||||
default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
|
||||
)
|
||||
cache_dir: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DataTrainingArguments:
|
||||
"""
|
||||
Arguments pertaining to what data we are going to input our model for training and eval.
|
||||
"""
|
||||
|
||||
task_name: str = field(metadata={"help": "The name of the task to train on: " + ", ".join(processors.keys())})
|
||||
data_dir: str = field(metadata={"help": "Should contain the data files for the task."})
|
||||
max_seq_length: int = field(
|
||||
default=128,
|
||||
metadata={
|
||||
"help": (
|
||||
"The maximum total input sequence length after tokenization. Sequences longer "
|
||||
"than this will be truncated, sequences shorter will be padded."
|
||||
)
|
||||
},
|
||||
)
|
||||
overwrite_cache: bool = field(
|
||||
default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
# See all possible arguments in src/transformers/training_args.py
|
||||
# or by passing the --help flag to this script.
|
||||
# We now keep distinct sets of args, for a cleaner separation of concerns.
|
||||
|
||||
parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
|
||||
model_args, data_args, training_args = parser.parse_args_into_dataclasses()
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
|
||||
datefmt="%m/%d/%Y %H:%M:%S",
|
||||
level=logging.INFO if training_args.local_process_index in [-1, 0] else logging.WARN,
|
||||
)
|
||||
logger.warning(
|
||||
"Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
|
||||
training_args.local_process_index,
|
||||
training_args.device,
|
||||
training_args.n_gpu,
|
||||
bool(training_args.parallel_mode.value == "distributed"),
|
||||
training_args.fp16,
|
||||
)
|
||||
# Set the verbosity to info of the Transformers logger (on main process only):
|
||||
if is_main_process(training_args.local_process_index):
|
||||
transformers.utils.logging.set_verbosity_info()
|
||||
transformers.utils.logging.enable_default_handler()
|
||||
transformers.utils.logging.enable_explicit_format()
|
||||
logger.info("Training/evaluation parameters %s", training_args)
|
||||
|
||||
# Set seed
|
||||
set_seed(training_args.seed)
|
||||
|
||||
try:
|
||||
processor = processors[data_args.task_name]()
|
||||
label_list = processor.get_labels()
|
||||
num_labels = len(label_list)
|
||||
except KeyError:
|
||||
raise ValueError("Task not found: %s" % (data_args.task_name))
|
||||
|
||||
# Load pretrained model and tokenizer
|
||||
#
|
||||
# Distributed training:
|
||||
# The .from_pretrained methods guarantee that only one local process can concurrently
|
||||
# download model & vocab.
|
||||
|
||||
config = AutoConfig.from_pretrained(
|
||||
model_args.config_name if model_args.config_name else model_args.model_name_or_path,
|
||||
num_labels=num_labels,
|
||||
finetuning_task=data_args.task_name,
|
||||
cache_dir=model_args.cache_dir,
|
||||
)
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
|
||||
cache_dir=model_args.cache_dir,
|
||||
)
|
||||
model = AutoModelForMultipleChoice.from_pretrained(
|
||||
model_args.model_name_or_path,
|
||||
from_tf=bool(".ckpt" in model_args.model_name_or_path),
|
||||
config=config,
|
||||
cache_dir=model_args.cache_dir,
|
||||
)
|
||||
|
||||
# Get datasets
|
||||
train_dataset = (
|
||||
MultipleChoiceDataset(
|
||||
data_dir=data_args.data_dir,
|
||||
tokenizer=tokenizer,
|
||||
task=data_args.task_name,
|
||||
max_seq_length=data_args.max_seq_length,
|
||||
overwrite_cache=data_args.overwrite_cache,
|
||||
mode=Split.train,
|
||||
)
|
||||
if training_args.do_train
|
||||
else None
|
||||
)
|
||||
eval_dataset = (
|
||||
MultipleChoiceDataset(
|
||||
data_dir=data_args.data_dir,
|
||||
tokenizer=tokenizer,
|
||||
task=data_args.task_name,
|
||||
max_seq_length=data_args.max_seq_length,
|
||||
overwrite_cache=data_args.overwrite_cache,
|
||||
mode=Split.dev,
|
||||
)
|
||||
if training_args.do_eval
|
||||
else None
|
||||
)
|
||||
|
||||
def compute_metrics(p: EvalPrediction) -> dict:
|
||||
preds = np.argmax(p.predictions, axis=1)
|
||||
return {"acc": simple_accuracy(preds, p.label_ids)}
|
||||
|
||||
# Data collator
|
||||
data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8) if training_args.fp16 else None
|
||||
|
||||
# Initialize our Trainer
|
||||
trainer = Trainer(
|
||||
model=model,
|
||||
args=training_args,
|
||||
train_dataset=train_dataset,
|
||||
eval_dataset=eval_dataset,
|
||||
compute_metrics=compute_metrics,
|
||||
data_collator=data_collator,
|
||||
)
|
||||
|
||||
# Training
|
||||
if training_args.do_train:
|
||||
trainer.train(
|
||||
model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None
|
||||
)
|
||||
trainer.save_model()
|
||||
# For convenience, we also re-save the tokenizer to the same directory,
|
||||
# so that you can share your model easily on huggingface.co/models =)
|
||||
if trainer.is_world_master():
|
||||
tokenizer.save_pretrained(training_args.output_dir)
|
||||
|
||||
# Evaluation
|
||||
results = {}
|
||||
if training_args.do_eval:
|
||||
logger.info("*** Evaluate ***")
|
||||
|
||||
result = trainer.evaluate()
|
||||
|
||||
output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt")
|
||||
if trainer.is_world_master():
|
||||
with open(output_eval_file, "w") as writer:
|
||||
logger.info("***** Eval results *****")
|
||||
for key, value in result.items():
|
||||
logger.info(" %s = %s", key, value)
|
||||
writer.write("{} = {}\n".format(key, value))
|
||||
|
||||
results.update(result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def _mp_fn(index):
|
||||
# For xla_spawn (TPUs)
|
||||
main()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
483
examples/legacy/multiple_choice/utils_multiple_choice.py
Normal file
483
examples/legacy/multiple_choice/utils_multiple_choice.py
Normal file
@ -0,0 +1,483 @@
|
||||
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
||||
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Multiple choice fine-tuning: utilities to work with multiple choice tasks of reading comprehension"""
|
||||
|
||||
import csv
|
||||
import glob
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
import tqdm
|
||||
from filelock import FileLock
|
||||
|
||||
from transformers import PreTrainedTokenizer, is_torch_available
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class InputExample:
|
||||
"""
|
||||
A single training/test example for multiple choice
|
||||
|
||||
Args:
|
||||
example_id: Unique id for the example.
|
||||
question: string. The untokenized text of the second sequence (question).
|
||||
contexts: list of str. The untokenized text of the first sequence (context of corresponding question).
|
||||
endings: list of str. multiple choice's options. Its length must be equal to contexts' length.
|
||||
label: (Optional) string. The label of the example. This should be
|
||||
specified for train and dev examples, but not for test examples.
|
||||
"""
|
||||
|
||||
example_id: str
|
||||
question: str
|
||||
contexts: list[str]
|
||||
endings: list[str]
|
||||
label: Optional[str]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class InputFeatures:
|
||||
"""
|
||||
A single set of features of data.
|
||||
Property names are the same names as the corresponding inputs to a model.
|
||||
"""
|
||||
|
||||
example_id: str
|
||||
input_ids: list[list[int]]
|
||||
attention_mask: Optional[list[list[int]]]
|
||||
token_type_ids: Optional[list[list[int]]]
|
||||
label: Optional[int]
|
||||
|
||||
|
||||
class Split(Enum):
|
||||
train = "train"
|
||||
dev = "dev"
|
||||
test = "test"
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
import torch
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
class MultipleChoiceDataset(Dataset):
|
||||
features: list[InputFeatures]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
data_dir: str,
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
task: str,
|
||||
max_seq_length: Optional[int] = None,
|
||||
overwrite_cache=False,
|
||||
mode: Split = Split.train,
|
||||
):
|
||||
processor = processors[task]()
|
||||
|
||||
cached_features_file = os.path.join(
|
||||
data_dir,
|
||||
"cached_{}_{}_{}_{}".format(
|
||||
mode.value,
|
||||
tokenizer.__class__.__name__,
|
||||
str(max_seq_length),
|
||||
task,
|
||||
),
|
||||
)
|
||||
|
||||
# Make sure only the first process in distributed training processes the dataset,
|
||||
# and the others will use the cache.
|
||||
lock_path = cached_features_file + ".lock"
|
||||
with FileLock(lock_path):
|
||||
if os.path.exists(cached_features_file) and not overwrite_cache:
|
||||
logger.info(f"Loading features from cached file {cached_features_file}")
|
||||
self.features = torch.load(cached_features_file, weights_only=True)
|
||||
else:
|
||||
logger.info(f"Creating features from dataset file at {data_dir}")
|
||||
label_list = processor.get_labels()
|
||||
if mode == Split.dev:
|
||||
examples = processor.get_dev_examples(data_dir)
|
||||
elif mode == Split.test:
|
||||
examples = processor.get_test_examples(data_dir)
|
||||
else:
|
||||
examples = processor.get_train_examples(data_dir)
|
||||
logger.info("Training examples: %s", len(examples))
|
||||
self.features = convert_examples_to_features(
|
||||
examples,
|
||||
label_list,
|
||||
max_seq_length,
|
||||
tokenizer,
|
||||
)
|
||||
logger.info("Saving features into cached file %s", cached_features_file)
|
||||
torch.save(self.features, cached_features_file)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.features)
|
||||
|
||||
def __getitem__(self, i) -> InputFeatures:
|
||||
return self.features[i]
|
||||
|
||||
|
||||
class DataProcessor:
|
||||
"""Base class for data converters for multiple choice data sets."""
|
||||
|
||||
def get_train_examples(self, data_dir):
|
||||
"""Gets a collection of `InputExample`s for the train set."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_dev_examples(self, data_dir):
|
||||
"""Gets a collection of `InputExample`s for the dev set."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_test_examples(self, data_dir):
|
||||
"""Gets a collection of `InputExample`s for the test set."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_labels(self):
|
||||
"""Gets the list of labels for this data set."""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class RaceProcessor(DataProcessor):
|
||||
"""Processor for the RACE data set."""
|
||||
|
||||
def get_train_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info(f"LOOKING AT {data_dir} train")
|
||||
high = os.path.join(data_dir, "train/high")
|
||||
middle = os.path.join(data_dir, "train/middle")
|
||||
high = self._read_txt(high)
|
||||
middle = self._read_txt(middle)
|
||||
return self._create_examples(high + middle, "train")
|
||||
|
||||
def get_dev_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info(f"LOOKING AT {data_dir} dev")
|
||||
high = os.path.join(data_dir, "dev/high")
|
||||
middle = os.path.join(data_dir, "dev/middle")
|
||||
high = self._read_txt(high)
|
||||
middle = self._read_txt(middle)
|
||||
return self._create_examples(high + middle, "dev")
|
||||
|
||||
def get_test_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info(f"LOOKING AT {data_dir} test")
|
||||
high = os.path.join(data_dir, "test/high")
|
||||
middle = os.path.join(data_dir, "test/middle")
|
||||
high = self._read_txt(high)
|
||||
middle = self._read_txt(middle)
|
||||
return self._create_examples(high + middle, "test")
|
||||
|
||||
def get_labels(self):
|
||||
"""See base class."""
|
||||
return ["0", "1", "2", "3"]
|
||||
|
||||
def _read_txt(self, input_dir):
|
||||
lines = []
|
||||
files = glob.glob(input_dir + "/*txt")
|
||||
for file in tqdm.tqdm(files, desc="read files"):
|
||||
with open(file, encoding="utf-8") as fin:
|
||||
data_raw = json.load(fin)
|
||||
data_raw["race_id"] = file
|
||||
lines.append(data_raw)
|
||||
return lines
|
||||
|
||||
def _create_examples(self, lines, set_type):
|
||||
"""Creates examples for the training and dev sets."""
|
||||
examples = []
|
||||
for _, data_raw in enumerate(lines):
|
||||
race_id = "{}-{}".format(set_type, data_raw["race_id"])
|
||||
article = data_raw["article"]
|
||||
for i in range(len(data_raw["answers"])):
|
||||
truth = str(ord(data_raw["answers"][i]) - ord("A"))
|
||||
question = data_raw["questions"][i]
|
||||
options = data_raw["options"][i]
|
||||
|
||||
examples.append(
|
||||
InputExample(
|
||||
example_id=race_id,
|
||||
question=question,
|
||||
contexts=[article, article, article, article], # this is not efficient but convenient
|
||||
endings=[options[0], options[1], options[2], options[3]],
|
||||
label=truth,
|
||||
)
|
||||
)
|
||||
return examples
|
||||
|
||||
|
||||
class SynonymProcessor(DataProcessor):
|
||||
"""Processor for the Synonym data set."""
|
||||
|
||||
def get_train_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info(f"LOOKING AT {data_dir} train")
|
||||
return self._create_examples(self._read_csv(os.path.join(data_dir, "mctrain.csv")), "train")
|
||||
|
||||
def get_dev_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info(f"LOOKING AT {data_dir} dev")
|
||||
return self._create_examples(self._read_csv(os.path.join(data_dir, "mchp.csv")), "dev")
|
||||
|
||||
def get_test_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info(f"LOOKING AT {data_dir} dev")
|
||||
|
||||
return self._create_examples(self._read_csv(os.path.join(data_dir, "mctest.csv")), "test")
|
||||
|
||||
def get_labels(self):
|
||||
"""See base class."""
|
||||
return ["0", "1", "2", "3", "4"]
|
||||
|
||||
def _read_csv(self, input_file):
|
||||
with open(input_file, encoding="utf-8") as f:
|
||||
return list(csv.reader(f))
|
||||
|
||||
def _create_examples(self, lines: list[list[str]], type: str):
|
||||
"""Creates examples for the training and dev sets."""
|
||||
|
||||
examples = [
|
||||
InputExample(
|
||||
example_id=line[0],
|
||||
question="", # in the swag dataset, the
|
||||
# common beginning of each
|
||||
# choice is stored in "sent2".
|
||||
contexts=[line[1], line[1], line[1], line[1], line[1]],
|
||||
endings=[line[2], line[3], line[4], line[5], line[6]],
|
||||
label=line[7],
|
||||
)
|
||||
for line in lines # we skip the line with the column names
|
||||
]
|
||||
|
||||
return examples
|
||||
|
||||
|
||||
class SwagProcessor(DataProcessor):
|
||||
"""Processor for the SWAG data set."""
|
||||
|
||||
def get_train_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info(f"LOOKING AT {data_dir} train")
|
||||
return self._create_examples(self._read_csv(os.path.join(data_dir, "train.csv")), "train")
|
||||
|
||||
def get_dev_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info(f"LOOKING AT {data_dir} dev")
|
||||
return self._create_examples(self._read_csv(os.path.join(data_dir, "val.csv")), "dev")
|
||||
|
||||
def get_test_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info(f"LOOKING AT {data_dir} dev")
|
||||
raise ValueError(
|
||||
"For swag testing, the input file does not contain a label column. It can not be tested in current code "
|
||||
"setting!"
|
||||
)
|
||||
return self._create_examples(self._read_csv(os.path.join(data_dir, "test.csv")), "test")
|
||||
|
||||
def get_labels(self):
|
||||
"""See base class."""
|
||||
return ["0", "1", "2", "3"]
|
||||
|
||||
def _read_csv(self, input_file):
|
||||
with open(input_file, encoding="utf-8") as f:
|
||||
return list(csv.reader(f))
|
||||
|
||||
def _create_examples(self, lines: list[list[str]], type: str):
|
||||
"""Creates examples for the training and dev sets."""
|
||||
if type == "train" and lines[0][-1] != "label":
|
||||
raise ValueError("For training, the input file must contain a label column.")
|
||||
|
||||
examples = [
|
||||
InputExample(
|
||||
example_id=line[2],
|
||||
question=line[5], # in the swag dataset, the
|
||||
# common beginning of each
|
||||
# choice is stored in "sent2".
|
||||
contexts=[line[4], line[4], line[4], line[4]],
|
||||
endings=[line[7], line[8], line[9], line[10]],
|
||||
label=line[11],
|
||||
)
|
||||
for line in lines[1:] # we skip the line with the column names
|
||||
]
|
||||
|
||||
return examples
|
||||
|
||||
|
||||
class ArcProcessor(DataProcessor):
|
||||
"""Processor for the ARC data set (request from allennlp)."""
|
||||
|
||||
def get_train_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info(f"LOOKING AT {data_dir} train")
|
||||
return self._create_examples(self._read_json(os.path.join(data_dir, "train.jsonl")), "train")
|
||||
|
||||
def get_dev_examples(self, data_dir):
|
||||
"""See base class."""
|
||||
logger.info(f"LOOKING AT {data_dir} dev")
|
||||
return self._create_examples(self._read_json(os.path.join(data_dir, "dev.jsonl")), "dev")
|
||||
|
||||
def get_test_examples(self, data_dir):
|
||||
logger.info(f"LOOKING AT {data_dir} test")
|
||||
return self._create_examples(self._read_json(os.path.join(data_dir, "test.jsonl")), "test")
|
||||
|
||||
def get_labels(self):
|
||||
"""See base class."""
|
||||
return ["0", "1", "2", "3"]
|
||||
|
||||
def _read_json(self, input_file):
|
||||
with open(input_file, encoding="utf-8") as fin:
|
||||
lines = fin.readlines()
|
||||
return lines
|
||||
|
||||
def _create_examples(self, lines, type):
|
||||
"""Creates examples for the training and dev sets."""
|
||||
|
||||
# There are two types of labels. They should be normalized
|
||||
def normalize(truth):
|
||||
if truth in "ABCD":
|
||||
return ord(truth) - ord("A")
|
||||
elif truth in "1234":
|
||||
return int(truth) - 1
|
||||
else:
|
||||
logger.info("truth ERROR! %s", str(truth))
|
||||
return None
|
||||
|
||||
examples = []
|
||||
three_choice = 0
|
||||
four_choice = 0
|
||||
five_choice = 0
|
||||
other_choices = 0
|
||||
# we deleted example which has more than or less than four choices
|
||||
for line in tqdm.tqdm(lines, desc="read arc data"):
|
||||
data_raw = json.loads(line.strip("\n"))
|
||||
if len(data_raw["question"]["choices"]) == 3:
|
||||
three_choice += 1
|
||||
continue
|
||||
elif len(data_raw["question"]["choices"]) == 5:
|
||||
five_choice += 1
|
||||
continue
|
||||
elif len(data_raw["question"]["choices"]) != 4:
|
||||
other_choices += 1
|
||||
continue
|
||||
four_choice += 1
|
||||
truth = str(normalize(data_raw["answerKey"]))
|
||||
assert truth != "None"
|
||||
question_choices = data_raw["question"]
|
||||
question = question_choices["stem"]
|
||||
id = data_raw["id"]
|
||||
options = question_choices["choices"]
|
||||
if len(options) == 4:
|
||||
examples.append(
|
||||
InputExample(
|
||||
example_id=id,
|
||||
question=question,
|
||||
contexts=[
|
||||
options[0]["para"].replace("_", ""),
|
||||
options[1]["para"].replace("_", ""),
|
||||
options[2]["para"].replace("_", ""),
|
||||
options[3]["para"].replace("_", ""),
|
||||
],
|
||||
endings=[options[0]["text"], options[1]["text"], options[2]["text"], options[3]["text"]],
|
||||
label=truth,
|
||||
)
|
||||
)
|
||||
|
||||
if type == "train":
|
||||
assert len(examples) > 1
|
||||
assert examples[0].label is not None
|
||||
logger.info("len examples: %s}", str(len(examples)))
|
||||
logger.info("Three choices: %s", str(three_choice))
|
||||
logger.info("Five choices: %s", str(five_choice))
|
||||
logger.info("Other choices: %s", str(other_choices))
|
||||
logger.info("four choices: %s", str(four_choice))
|
||||
|
||||
return examples
|
||||
|
||||
|
||||
def convert_examples_to_features(
|
||||
examples: list[InputExample],
|
||||
label_list: list[str],
|
||||
max_length: int,
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
) -> list[InputFeatures]:
|
||||
"""
|
||||
Loads a data file into a list of `InputFeatures`
|
||||
"""
|
||||
|
||||
label_map = {label: i for i, label in enumerate(label_list)}
|
||||
|
||||
features = []
|
||||
for ex_index, example in tqdm.tqdm(enumerate(examples), desc="convert examples to features"):
|
||||
if ex_index % 10000 == 0:
|
||||
logger.info("Writing example %d of %d" % (ex_index, len(examples)))
|
||||
choices_inputs = []
|
||||
for ending_idx, (context, ending) in enumerate(zip(example.contexts, example.endings)):
|
||||
text_a = context
|
||||
if example.question.find("_") != -1:
|
||||
# this is for cloze question
|
||||
text_b = example.question.replace("_", ending)
|
||||
else:
|
||||
text_b = example.question + " " + ending
|
||||
|
||||
inputs = tokenizer(
|
||||
text_a,
|
||||
text_b,
|
||||
add_special_tokens=True,
|
||||
max_length=max_length,
|
||||
padding="max_length",
|
||||
truncation=True,
|
||||
return_overflowing_tokens=True,
|
||||
)
|
||||
if "num_truncated_tokens" in inputs and inputs["num_truncated_tokens"] > 0:
|
||||
logger.info(
|
||||
"Attention! you are cropping tokens (swag task is ok). "
|
||||
"If you are training ARC and RACE and you are popping question + options, "
|
||||
"you need to try to use a bigger max seq length!"
|
||||
)
|
||||
|
||||
choices_inputs.append(inputs)
|
||||
|
||||
label = label_map[example.label]
|
||||
|
||||
input_ids = [x["input_ids"] for x in choices_inputs]
|
||||
attention_mask = (
|
||||
[x["attention_mask"] for x in choices_inputs] if "attention_mask" in choices_inputs[0] else None
|
||||
)
|
||||
token_type_ids = (
|
||||
[x["token_type_ids"] for x in choices_inputs] if "token_type_ids" in choices_inputs[0] else None
|
||||
)
|
||||
|
||||
features.append(
|
||||
InputFeatures(
|
||||
example_id=example.example_id,
|
||||
input_ids=input_ids,
|
||||
attention_mask=attention_mask,
|
||||
token_type_ids=token_type_ids,
|
||||
label=label,
|
||||
)
|
||||
)
|
||||
|
||||
for f in features[:2]:
|
||||
logger.info("*** Example ***")
|
||||
logger.info("feature: %s" % f)
|
||||
|
||||
return features
|
||||
|
||||
|
||||
processors = {"race": RaceProcessor, "swag": SwagProcessor, "arc": ArcProcessor, "syn": SynonymProcessor}
|
||||
MULTIPLE_CHOICE_TASKS_NUM_LABELS = {"race", 4, "swag", 4, "arc", 4, "syn", 5}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user