Compare commits

..

7 Commits

87 changed files with 2261 additions and 1119 deletions

View File

@ -28,7 +28,7 @@ jobs:
(github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark') )||
(github.event_name == 'push' && github.ref == 'refs/heads/main')
container:
image: huggingface/transformers-all-latest-gpu
image: huggingface/transformers-pytorch-gpu
options: --gpus all --privileged --ipc host
steps:
- name: Get repo

View File

@ -9,7 +9,7 @@ jobs:
uses: ./.github/workflows/benchmark_v2.yml
with:
runner: aws-g5-4xlarge-cache-use1-public-80
container_image: huggingface/transformers-all-latest-gpu
container_image: huggingface/transformers-pytorch-gpu
container_options: --gpus all --privileged --ipc host --shm-size "16gb"
commit_sha: ${{ github.sha }}
run_id: ${{ github.run_id }}

View File

@ -45,52 +45,26 @@ jobs:
REF=main
push: true
tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the transformers-all-latest-gpu docker build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
flash-attn-ci-image:
name: "PyTorch with Flash Attn [dev]"
runs-on:
group: aws-general-8-plus
steps:
# Push CI images still need to be re-built daily
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
name: Build and push (for Push CI) in a daily basis
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
if: inputs.image_postfix != '-push-ci'
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-all-latest-gpu
build-args: |
REF=main
PYTORCH=2.8.0
TORCHCODEC=0.7.0
FLASH_ATTN=yes
push: true
tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}:flash-attn
tags: huggingface/transformers-all-latest-gpu-push-ci
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the transformers-all-latest-gpu docker build
title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
@ -130,8 +104,51 @@ jobs:
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
# Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
latest-torch-deepspeed-docker-for-push-ci-daily-build:
name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
# Push CI images still need to be re-built daily
-
name: Build and push (for Push CI) in a daily basis
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
if: inputs.image_postfix != '-push-ci'
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-pytorch-deepspeed-latest-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
doc-builder:
name: "Doc builder"
# Push CI doesn't need this image
if: inputs.image_postfix != '-push-ci'
runs-on:
group: aws-general-8-plus
steps:
@ -164,6 +181,44 @@ jobs:
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
latest-pytorch:
name: "Latest PyTorch [dev]"
# Push CI doesn't need this image
if: inputs.image_postfix != '-push-ci'
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-pytorch-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-pytorch-gpu
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
latest-pytorch-amd:
name: "Latest PyTorch (AMD) [dev]"
runs-on:
@ -190,47 +245,29 @@ jobs:
REF=main
push: true
tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }}
# Push CI images still need to be re-built daily
-
name: Build and push (for Push CI) in a daily basis
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
if: inputs.image_postfix != '-push-ci'
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-pytorch-amd-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-pytorch-amd-gpu-push-ci
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu build
title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
cache-latest-pytorch-amd:
name: "Cache Latest Pytorch (AMD) Image"
needs: latest-pytorch-amd
runs-on:
group: amd-mi325-1gpu
steps:
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Pull and save docker image to cache
run: |
image="huggingface/transformers-pytorch-amd-gpu"
final_path="/mnt/image-cache/transformers-pytorch-amd-gpu.tar"
tmp_path="${final_path}.tmp"
echo "Pulling image: ${image}"
docker pull "${image}"
echo "Saving to temp file: ${tmp_path}"
docker save "${image}" -o "${tmp_path}"
echo "Moving to final path: ${final_path}"
mv -f "${tmp_path}" "${final_path}"
echo "Cache populated successfully at ${final_path}"
latest-pytorch-deepspeed-amd:
name: "PyTorch + DeepSpeed (AMD) [dev]"
runs-on:
@ -257,6 +294,19 @@ jobs:
REF=main
push: true
tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
# Push CI images still need to be re-built daily
-
name: Build and push (for Push CI) in a daily basis
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
if: inputs.image_postfix != '-push-ci'
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-pytorch-deepspeed-amd-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
- name: Post to Slack
if: always()
@ -269,6 +319,8 @@ jobs:
latest-quantization-torch-docker:
name: "Latest Pytorch + Quantization [dev]"
# Push CI doesn't need this image
if: inputs.image_postfix != '-push-ci'
runs-on:
group: aws-general-8-plus
steps:

View File

@ -28,9 +28,6 @@ on:
report_repo_id:
required: false
type: string
pytest_marker:
required: false
type: string
env:
HF_HOME: /mnt/cache
@ -140,7 +137,7 @@ jobs:
- name: Run all tests on GPU
working-directory: /transformers
run: |
script -q -c "PATCH_TESTING_METHODS_TO_COLLECT_OUTPUTS=yes _PATCHED_TESTING_METHODS_OUTPUT_DIR=/transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports python3 -m pytest -rsfE -v -m '${{ inputs.pytest_marker }}' --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports tests/${{ matrix.folders }}" test_outputs.txt
script -q -c "PATCH_TESTING_METHODS_TO_COLLECT_OUTPUTS=yes _PATCHED_TESTING_METHODS_OUTPUT_DIR=/transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports tests/${{ matrix.folders }}" test_outputs.txt
ls -la
# Extract the exit code from the output file
EXIT_CODE=$(tail -1 test_outputs.txt | grep -o 'COMMAND_EXIT_CODE="[0-9]*"' | cut -d'"' -f2)

View File

@ -149,7 +149,7 @@ jobs:
with:
job: run_models_gpu
slack_report_channel: "#transformers-ci-push"
docker: huggingface/transformers-all-latest-gpu:flash-attn
docker: huggingface/transformers-all-latest-gpu
ci_event: push
report_repo_id: hf-internal-testing/transformers_ci_push
commit_sha: ${{ github.sha }}

View File

@ -0,0 +1,25 @@
name: Self-hosted runner (AMD mi210 CI caller)
on:
#workflow_run:
# workflows: ["Self-hosted runner (push-caller)"]
# branches: ["main"]
# types: [completed]
push:
branches:
- run_amd_push_ci_caller*
paths:
- "src/**"
- "tests/**"
- ".github/**"
- "templates/**"
- "utils/**"
jobs:
run_amd_ci:
name: AMD mi210
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
uses: ./.github/workflows/self-push-amd.yml
with:
gpu_flavor: mi210
secrets: inherit

View File

@ -0,0 +1,25 @@
name: Self-hosted runner (AMD mi250 CI caller)
on:
#workflow_run:
# workflows: ["Self-hosted runner (push-caller)"]
# branches: ["main"]
# types: [completed]
push:
branches:
- run_amd_push_ci_caller*
paths:
- "src/**"
- "tests/**"
- ".github/**"
- "templates/**"
- "utils/**"
jobs:
run_amd_ci:
name: AMD mi250
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
uses: ./.github/workflows/self-push-amd.yml
with:
gpu_flavor: mi250
secrets: inherit

334
.github/workflows/self-push-amd.yml vendored Normal file
View File

@ -0,0 +1,334 @@
name: Self-hosted runner AMD GPU (push)
on:
workflow_call:
inputs:
gpu_flavor:
required: true
type: string
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
PYTEST_TIMEOUT: 60
TF_FORCE_GPU_ALLOW_GROWTH: true
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
jobs:
check_runner_status:
name: Check Runner Status
runs-on: ubuntu-22.04
steps:
- name: Checkout transformers
uses: actions/checkout@v4
with:
fetch-depth: 2
- name: Check Runner Status
run: python utils/check_self_hosted_runner.py --target_runners amd-mi210-single-gpu-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
check_runners:
name: Check Runners
needs: check_runner_status
strategy:
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
setup_gpu:
name: Setup
needs: check_runners
strategy:
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
test_map: ${{ steps.set-matrix.outputs.test_map }}
env:
# `CI_BRANCH_PUSH`: The branch name from the push event
# `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
# `CI_SHA_PUSH`: The commit SHA from the push event
# `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
CI_BRANCH_PUSH: ${{ github.event.ref }}
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
steps:
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
# We also take into account the `push` event (we might want to test some changes in a branch)
- name: Prepare custom environment variables
shell: bash
# `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty)
# `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty)
run: |
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
echo $CI_BRANCH_PUSH
echo $CI_BRANCH_WORKFLOW_RUN
echo $CI_SHA_PUSH
echo $CI_SHA_WORKFLOW_RUN
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
- name: print environment variables
run: |
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
echo "env.CI_SHA = ${{ env.CI_SHA }}"
- name: Update clone using environment variables
working-directory: /transformers
run: |
echo "original branch = $(git branch --show-current)"
git fetch && git checkout ${{ env.CI_BRANCH }}
echo "updated branch = $(git branch --show-current)"
git checkout ${{ env.CI_SHA }}
echo "log = $(git log -n 1)"
- name: Cleanup
working-directory: /transformers
run: |
rm -rf tests/__pycache__
rm -rf tests/models/__pycache__
rm -rf reports
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Fetch the tests to run
working-directory: /transformers
# TODO: add `git-python` in the docker images
run: |
pip install --upgrade git-python
python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
- name: Report fetched tests
uses: actions/upload-artifact@v4
with:
name: test_fetched
path: /transformers/test_preparation.txt
- id: set-matrix
name: Organize tests into models
working-directory: /transformers
# The `keys` is used as GitHub actions matrix for jobs, i.e. `models/bert`, `tokenization`, `pipeline`, etc.
# The `test_map` is used to get the actual identified test files under each key.
# If no test to run (so no `test_map.json` file), create a dummy map (empty matrix will fail)
run: |
if [ -f test_map.json ]; then
keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)')
test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)')
else
keys=$(python3 -c 'keys = ["dummy"]; print(keys)')
test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)')
fi
echo $keys
echo $test_map
echo "matrix=$keys" >> $GITHUB_OUTPUT
echo "test_map=$test_map" >> $GITHUB_OUTPUT
run_models_gpu:
name: Model tests
needs: setup_gpu
# `dummy` means there is no test to run
if: contains(fromJson(needs.setup_gpu.outputs.matrix), 'dummy') != true
strategy:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }}
machine_type: [single-gpu, multi-gpu]
runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
container:
image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
env:
# For the meaning of these environment variables, see the job `Setup`
CI_BRANCH_PUSH: ${{ github.event.ref }}
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
steps:
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
# We also take into account the `push` event (we might want to test some changes in a branch)
- name: Prepare custom environment variables
shell: bash
# For the meaning of these environment variables, see the job `Setup`
run: |
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
echo $CI_BRANCH_PUSH
echo $CI_BRANCH_WORKFLOW_RUN
echo $CI_SHA_PUSH
echo $CI_SHA_WORKFLOW_RUN
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
- name: print environment variables
run: |
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
echo "env.CI_SHA = ${{ env.CI_SHA }}"
- name: Update clone using environment variables
working-directory: /transformers
run: |
echo "original branch = $(git branch --show-current)"
git fetch && git checkout ${{ env.CI_BRANCH }}
echo "updated branch = $(git branch --show-current)"
git checkout ${{ env.CI_SHA }}
echo "log = $(git log -n 1)"
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: Echo folder ${{ matrix.folders }}
shell: bash
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
# set the artifact folder names (because the character `/` is not allowed).
run: |
echo "${{ matrix.folders }}"
echo "${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }}"
matrix_folders=${{ matrix.folders }}
matrix_folders=${matrix_folders/'models/'/'models_'}
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all non-slow selected tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} -m "not not_device_test"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
send_results:
name: Send results to webhook
runs-on: ubuntu-22.04
if: always()
needs: [
check_runner_status,
check_runners,
setup_gpu,
run_models_gpu,
# run_tests_torch_cuda_extensions_single_gpu,
# run_tests_torch_cuda_extensions_multi_gpu
]
env:
# For the meaning of these environment variables, see the job `Setup`
CI_BRANCH_PUSH: ${{ github.event.ref }}
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
steps:
- name: Preliminary job status
shell: bash
# For the meaning of these environment variables, see the job `Setup`
run: |
echo "Runner availability: ${{ needs.check_runner_status.result }}"
echo "Setup status: ${{ needs.setup_gpu.result }}"
echo "Runner status: ${{ needs.check_runners.result }}"
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
# We also take into account the `push` event (we might want to test some changes in a branch)
- name: Prepare custom environment variables
shell: bash
# For the meaning of these environment variables, see the job `Setup`
run: |
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
echo $CI_BRANCH_PUSH
echo $CI_BRANCH_WORKFLOW_RUN
echo $CI_SHA_PUSH
echo $CI_SHA_WORKFLOW_RUN
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
- name: print environment variables
run: |
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
echo "env.CI_SHA = ${{ env.CI_SHA }}"
- uses: actions/checkout@v4
# To avoid failure when multiple commits are merged into `main` in a short period of time.
# Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ...
# (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit)
with:
fetch-depth: 20
- name: Update clone using environment variables
run: |
echo "original branch = $(git branch --show-current)"
git fetch && git checkout ${{ env.CI_BRANCH }}
echo "updated branch = $(git branch --show-current)"
git checkout ${{ env.CI_SHA }}
echo "log = $(git log -n 1)"
- uses: actions/download-artifact@v4
- name: Send message to Slack
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
CI_SLACK_CHANNEL_ID_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }}
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }}
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: Push CI (AMD) - ${{ inputs.gpu_flavor }}
CI_TITLE_PUSH: ${{ github.event.head_commit.message }}
CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }}
CI_SHA: ${{ env.CI_SHA }}
RUNNER_STATUS: ${{ needs.check_runner_status.result }}
RUNNER_ENV_STATUS: ${{ needs.check_runners.result }}
SETUP_STATUS: ${{ needs.setup_gpu.result }}
# We pass `needs.setup_gpu.outputs.matrix` as the argument. A processing in `notification_service.py` to change
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
run: |
pip install huggingface_hub
pip install slack_sdk
pip show slack_sdk
python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}"

54
.github/workflows/self-push-caller.yml vendored Normal file
View File

@ -0,0 +1,54 @@
# Used to trigger self-push CI
name: Self-hosted runner (push-caller)
on:
push:
branches:
- main
paths:
- "src/**"
- "tests/**"
- ".github/**"
- "templates/**"
- "utils/**"
jobs:
check-for-setup:
runs-on: ubuntu-22.04
name: Check if setup was changed
outputs:
changed: ${{ steps.was_changed.outputs.changed }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: "2"
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@1c8e6069583811afb28f97afeaf8e7da80c6be5c
- name: Was setup changed
id: was_changed
run: |
for file in ${{ steps.changed-files.outputs.all_changed_files }}; do
if [ `basename "${file}"` = "setup.py" ]; then
echo "changed=1" >> $GITHUB_OUTPUT
fi
done
build-docker-containers:
needs: check-for-setup
if: (github.event_name == 'push') && (needs.check-for-setup.outputs.changed == '1')
uses: ./.github/workflows/build-docker-images.yml
with:
image_postfix: "-push-ci"
secrets: inherit
run_push_ci:
name: Trigger Push CI
runs-on: ubuntu-22.04
if: ${{ always() }}
needs: build-docker-containers
steps:
- name: Trigger push CI via workflow_run
run: echo "Trigger push CI via workflow_run"

652
.github/workflows/self-push.yml vendored Normal file
View File

@ -0,0 +1,652 @@
name: Self-hosted runner (push)
on:
workflow_run:
workflows: ["Self-hosted runner (push-caller)"]
branches: ["main"]
types: [completed]
push:
branches:
- ci_*
- ci-*
paths:
- "src/**"
- "tests/**"
- ".github/**"
- "templates/**"
- "utils/**"
repository_dispatch:
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
PYTEST_TIMEOUT: 60
TF_FORCE_GPU_ALLOW_GROWTH: true
CUDA_VISIBLE_DEVICES: 0,1
jobs:
setup:
name: Setup
strategy:
matrix:
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu-push-ci
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
test_map: ${{ steps.set-matrix.outputs.test_map }}
env:
# `CI_BRANCH_PUSH`: The branch name from the push event
# `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
# `CI_SHA_PUSH`: The commit SHA from the push event
# `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
CI_BRANCH_PUSH: ${{ github.event.ref }}
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
steps:
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
# We also take into account the `push` event (we might want to test some changes in a branch)
- name: Prepare custom environment variables
shell: bash
# `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty)
# `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty)
run: |
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
echo $CI_BRANCH_PUSH
echo $CI_BRANCH_WORKFLOW_RUN
echo $CI_SHA_PUSH
echo $CI_SHA_WORKFLOW_RUN
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
- name: print environment variables
run: |
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
echo "env.CI_SHA = ${{ env.CI_SHA }}"
- name: Update clone using environment variables
working-directory: /transformers
run: |
echo "original branch = $(git branch --show-current)"
git fetch && git checkout ${{ env.CI_BRANCH }}
echo "updated branch = $(git branch --show-current)"
git checkout ${{ env.CI_SHA }}
echo "log = $(git log -n 1)"
- name: Cleanup
working-directory: /transformers
run: |
rm -rf tests/__pycache__
rm -rf tests/models/__pycache__
rm -rf reports
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Fetch the tests to run
working-directory: /transformers
# TODO: add `git-python` in the docker images
run: |
pip install --upgrade git-python
python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
- name: Report fetched tests
uses: actions/upload-artifact@v4
with:
name: test_fetched
path: /transformers/test_preparation.txt
- id: set-matrix
name: Organize tests into models
working-directory: /transformers
# The `keys` is used as GitHub actions matrix for jobs, i.e. `models/bert`, `tokenization`, `pipeline`, etc.
# The `test_map` is used to get the actual identified test files under each key.
# If no test to run (so no `test_map.json` file), create a dummy map (empty matrix will fail)
run: |
if [ -f test_map.json ]; then
keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)')
test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)')
else
keys=$(python3 -c 'keys = ["dummy"]; print(keys)')
test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)')
fi
echo $keys
echo $test_map
echo "matrix=$keys" >> $GITHUB_OUTPUT
echo "test_map=$test_map" >> $GITHUB_OUTPUT
run_tests_single_gpu:
name: Model tests
needs: setup
# `dummy` means there is no test to run
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
strategy:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
machine_type: [aws-g5-4xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu-push-ci
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
env:
# For the meaning of these environment variables, see the job `Setup`
CI_BRANCH_PUSH: ${{ github.event.ref }}
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
steps:
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
# We also take into account the `push` event (we might want to test some changes in a branch)
- name: Prepare custom environment variables
shell: bash
# For the meaning of these environment variables, see the job `Setup`
run: |
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
echo $CI_BRANCH_PUSH
echo $CI_BRANCH_WORKFLOW_RUN
echo $CI_SHA_PUSH
echo $CI_SHA_WORKFLOW_RUN
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
- name: print environment variables
run: |
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
echo "env.CI_SHA = ${{ env.CI_SHA }}"
- name: Set `machine_type` for report and artifact names
working-directory: /transformers
shell: bash
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
fi
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Update clone using environment variables
working-directory: /transformers
run: |
echo "original branch = $(git branch --show-current)"
git fetch && git checkout ${{ env.CI_BRANCH }}
echo "updated branch = $(git branch --show-current)"
git checkout ${{ env.CI_SHA }}
echo "log = $(git log -n 1)"
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: Echo folder ${{ matrix.folders }}
shell: bash
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
# set the artifact folder names (because the character `/` is not allowed).
run: |
echo "${{ matrix.folders }}"
echo "${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}"
matrix_folders=${{ matrix.folders }}
matrix_folders=${matrix_folders/'models/'/'models_'}
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all non-slow selected tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}
run_tests_multi_gpu:
name: Model tests
needs: setup
# `dummy` means there is no test to run
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
strategy:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
machine_type: [aws-g5-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu-push-ci
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
env:
# For the meaning of these environment variables, see the job `Setup`
CI_BRANCH_PUSH: ${{ github.event.ref }}
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
steps:
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
# We also take into account the `push` event (we might want to test some changes in a branch)
- name: Prepare custom environment variables
shell: bash
# For the meaning of these environment variables, see the job `Setup`
run: |
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
echo $CI_BRANCH_PUSH
echo $CI_BRANCH_WORKFLOW_RUN
echo $CI_SHA_PUSH
echo $CI_SHA_WORKFLOW_RUN
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
- name: print environment variables
run: |
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
echo "env.CI_SHA = ${{ env.CI_SHA }}"
- name: Set `machine_type` for report and artifact names
working-directory: /transformers
shell: bash
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
fi
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Update clone using environment variables
working-directory: /transformers
run: |
echo "original branch = $(git branch --show-current)"
git fetch && git checkout ${{ env.CI_BRANCH }}
echo "updated branch = $(git branch --show-current)"
git checkout ${{ env.CI_SHA }}
echo "log = $(git log -n 1)"
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: Echo folder ${{ matrix.folders }}
shell: bash
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
# set the artifact folder names (because the character `/` is not allowed).
run: |
echo "${{ matrix.folders }}"
echo "${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}"
matrix_folders=${{ matrix.folders }}
matrix_folders=${matrix_folders/'models/'/'models_'}
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all non-slow selected tests on GPU
env:
MKL_SERVICE_FORCE_INTEL: 1
working-directory: /transformers
run: |
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}
run_tests_torch_cuda_extensions_single_gpu:
name: Torch CUDA extension tests
needs: setup
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
strategy:
fail-fast: false
matrix:
machine_type: [aws-g5-4xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
env:
# For the meaning of these environment variables, see the job `Setup`
CI_BRANCH_PUSH: ${{ github.event.ref }}
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
steps:
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
# We also take into account the `push` event (we might want to test some changes in a branch)
- name: Prepare custom environment variables
shell: bash
# For the meaning of these environment variables, see the job `Setup`
run: |
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
echo $CI_BRANCH_PUSH
echo $CI_BRANCH_WORKFLOW_RUN
echo $CI_SHA_PUSH
echo $CI_SHA_WORKFLOW_RUN
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
- name: print environment variables
run: |
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
echo "env.CI_SHA = ${{ env.CI_SHA }}"
- name: Set `machine_type` for report and artifact names
working-directory: /workspace/transformers
shell: bash
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
fi
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Update clone using environment variables
working-directory: /workspace/transformers
run: |
echo "original branch = $(git branch --show-current)"
git fetch && git checkout ${{ env.CI_BRANCH }}
echo "updated branch = $(git branch --show-current)"
git checkout ${{ env.CI_SHA }}
echo "log = $(git log -n 1)"
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /workspace/transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: Remove cached torch extensions
run: rm -rf /github/home/.cache/torch_extensions/
# To avoid unknown test failures
- name: Pre build DeepSpeed *again*
working-directory: /workspace
run: |
python3 -m pip uninstall -y deepspeed
DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /workspace/transformers
run: |
python utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /workspace/transformers
run: pip freeze
- name: Run all non-slow selected tests on GPU
working-directory: /workspace/transformers
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
run: |
python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
path: /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
run_tests_torch_cuda_extensions_multi_gpu:
name: Torch CUDA extension tests
needs: setup
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
strategy:
fail-fast: false
matrix:
machine_type: [aws-g5-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
env:
# For the meaning of these environment variables, see the job `Setup`
CI_BRANCH_PUSH: ${{ github.event.ref }}
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
steps:
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
# We also take into account the `push` event (we might want to test some changes in a branch)
- name: Prepare custom environment variables
shell: bash
# For the meaning of these environment variables, see the job `Setup`
run: |
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
echo $CI_BRANCH_PUSH
echo $CI_BRANCH_WORKFLOW_RUN
echo $CI_SHA_PUSH
echo $CI_SHA_WORKFLOW_RUN
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
- name: print environment variables
run: |
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
echo "env.CI_SHA = ${{ env.CI_SHA }}"
- name: Set `machine_type` for report and artifact names
working-directory: /workspace/transformers
shell: bash
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
fi
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Update clone using environment variables
working-directory: /workspace/transformers
run: |
echo "original branch = $(git branch --show-current)"
git fetch && git checkout ${{ env.CI_BRANCH }}
echo "updated branch = $(git branch --show-current)"
git checkout ${{ env.CI_SHA }}
echo "log = $(git log -n 1)"
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /workspace/transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: Remove cached torch extensions
run: rm -rf /github/home/.cache/torch_extensions/
# To avoid unknown test failures
- name: Pre build DeepSpeed *again*
working-directory: /workspace
run: |
python3 -m pip uninstall -y deepspeed
DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /workspace/transformers
run: |
python utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /workspace/transformers
run: pip freeze
- name: Run all non-slow selected tests on GPU
working-directory: /workspace/transformers
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
run: |
python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
path: /workspace/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
send_results:
name: Send results to webhook
runs-on: ubuntu-22.04
if: always()
needs: [
setup,
run_tests_single_gpu,
run_tests_multi_gpu,
run_tests_torch_cuda_extensions_single_gpu,
run_tests_torch_cuda_extensions_multi_gpu
]
env:
# For the meaning of these environment variables, see the job `Setup`
CI_BRANCH_PUSH: ${{ github.event.ref }}
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
CI_SHA_PUSH: ${{ github.event.head_commit.id }}
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
steps:
- name: Preliminary job status
shell: bash
# For the meaning of these environment variables, see the job `Setup`
run: |
echo "Setup status: ${{ needs.setup.result }}"
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
# We also take into account the `push` event (we might want to test some changes in a branch)
- name: Prepare custom environment variables
shell: bash
# For the meaning of these environment variables, see the job `Setup`
run: |
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
echo $CI_BRANCH_PUSH
echo $CI_BRANCH_WORKFLOW_RUN
echo $CI_SHA_PUSH
echo $CI_SHA_WORKFLOW_RUN
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
- name: print environment variables
run: |
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
echo "env.CI_SHA = ${{ env.CI_SHA }}"
- uses: actions/checkout@v4
# To avoid failure when multiple commits are merged into `main` in a short period of time.
# Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ...
# (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit)
with:
fetch-depth: 20
- name: Update clone using environment variables
run: |
echo "original branch = $(git branch --show-current)"
git fetch && git checkout ${{ env.CI_BRANCH }}
echo "updated branch = $(git branch --show-current)"
git checkout ${{ env.CI_SHA }}
echo "log = $(git log -n 1)"
- uses: actions/download-artifact@v4
- name: Send message to Slack
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: push
CI_TITLE_PUSH: ${{ github.event.head_commit.message }}
CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }}
CI_SHA: ${{ env.CI_SHA }}
SETUP_STATUS: ${{ needs.setup.result }}
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
run: |
pip install huggingface_hub
pip install slack_sdk
pip show slack_sdk
python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"

View File

@ -63,7 +63,7 @@ jobs:
with:
job: run_pipelines_torch_gpu
slack_report_channel: "#transformers-ci-daily-pipeline-torch"
docker: huggingface/transformers-all-latest-gpu
docker: huggingface/transformers-pytorch-gpu
ci_event: Daily CI
report_repo_id: hf-internal-testing/transformers_daily_ci
commit_sha: ${{ github.sha }}

View File

@ -1,60 +0,0 @@
name: Nvidia CI - Flash Attn
on:
repository_dispatch:
schedule:
- cron: "17 2 * * *"
push:
branches:
- run_nvidia_ci_flash_attn*
workflow_dispatch:
inputs:
prev_workflow_run_id:
description: 'previous workflow run id to compare'
type: string
required: false
default: ""
other_workflow_run_id:
description: 'other workflow run id to compare'
type: string
required: false
default: ""
# Used for `push` to easily modify the target workflow runs to compare against
env:
prev_workflow_run_id: ""
other_workflow_run_id: ""
jobs:
setup:
name: Setup
runs-on: ubuntu-22.04
steps:
- name: Setup
run: |
mkdir "setup_values"
echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt"
echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt"
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: setup_values
path: setup_values
model-ci:
name: Model CI
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_models_gpu
slack_report_channel: "#transformers-ci-flash-attn"
docker: huggingface/transformers-all-latest-gpu:flash-attn
ci_event: Daily CI
runner_type: "a10"
report_repo_id: hf-internal-testing/transformers_flash_attn_ci
commit_sha: ${{ github.sha }}
pytest_marker: "flash_attn_test or flash_attn_3_test"
secrets: inherit

View File

@ -38,10 +38,6 @@ on:
default: ""
required: false
type: string
pytest_marker:
required: false
type: string
env:
HF_HOME: /mnt/cache
@ -131,7 +127,6 @@ jobs:
commit_sha: ${{ inputs.commit_sha || github.sha }}
runner_type: ${{ inputs.runner_type }}
report_repo_id: ${{ inputs.report_repo_id }}
pytest_marker: ${{ inputs.pytest_marker }}
secrets: inherit
run_trainer_and_fsdp_gpu:
@ -165,7 +160,7 @@ jobs:
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu
image: huggingface/transformers-pytorch-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Update clone

View File

@ -41,9 +41,9 @@ jobs:
elif [[ "$NUM_GPUS" == "multi" && "$RUNNER_TYPE" == "t4" ]]; then
echo "RUNNER=aws-g4dn-12xlarge-cache" >> $GITHUB_ENV
elif [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "a10" ]]; then
echo "RUNNER=aws-g5-4xlarge-cache" >> $GITHUB_ENV
echo "RUNNER=aws-g5-4xlarge-cache-ssh-use2" >> $GITHUB_ENV
elif [[ "$NUM_GPUS" == "multi" && "$RUNNER_TYPE" == "a10" ]]; then
echo "RUNNER=aws-g5-12xlarge-cache" >> $GITHUB_ENV
echo "RUNNER=aws-g5-12xlarge-cache-ssh" >> $GITHUB_ENV
else
echo "RUNNER=" >> $GITHUB_ENV
fi
@ -106,7 +106,7 @@ jobs:
else
echo "SLACKCHANNEL=${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}" >> $GITHUB_ENV
fi
- name: Tailscale # In order to be able to SSH when a test fails
uses: huggingface/tailscale-action@main
with:
@ -115,3 +115,10 @@ jobs:
slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
waitForSSH: true
sshTimeout: 15m
version: '1.90.3'
sha256sum: '96411140a11ccdfff6243b88e3f84692e1f176990050fb9f43a53970c0873f31'
- name: wait2
if: success() || failure()
shell: bash
run: sleep 15m

View File

@ -14,7 +14,7 @@ This AGENTS.md file provides guidance for code agents working with this codebase
- PRs should be as brief as possible. Bugfix PRs in particular can often be only one or two lines long, and do not need large comments, docstrings or new functions in this case. Aim to minimize the size of the diff.
- When writing tests, they should be added to an existing file. The only exception is for PRs to add a new model, when a new test directory should be created for that model.
- Code style is enforced in the CI. You can install the style tools with `pip install -e ".[quality]"`. You can then run `make fixup` to apply style and consistency fixes to your code.
- Code style is enforced in the CI. You can install the style tools with `pip install -e .[quality]`. You can then run `make fixup` to apply style and consistency fixes to your code.
## Copying and inheritance
@ -36,4 +36,4 @@ After making changes, you should usually run `make fixup` to ensure any copies a
the model you made the changes in and any other models that were updated by `make fixup`. Tests can be run with `pytest tests/models/[name]/test_modeling_[name].py`
If your changes affect code in other classes like tokenizers or processors, you should run those tests instead, like `test_processing_[name].py` or `test_tokenization_[name].py`.
In order to run tests, you may need to install dependencies. You can do this with `pip install -e ".[testing]"`. You will probably also need to `pip install torch accelerate` if your environment does not already have them.
In order to run tests, you may need to install dependencies. You can do this with `pip install -e .[testing]`. You will probably also need to `pip install torch accelerate` if your environment does not already have them.

View File

@ -9,12 +9,6 @@ In this list, we showcase incredibly impactful and novel projects that have push
adding other projects to the list. If you believe a project should be here and it's not, then please, open a PR
to add it.
## [◉ Universal Intelligence](https://github.com/blueraai/universal-intelligence)
[Universal Intelligence](https://github.com/blueraai/universal-intelligence) aims to standardize models, tools, and agents —transforming them into simple, composable, portable, interoperable, framework-agnostic, hardware-agnostic interfaces (through auto-negotiation and resource sharing); for fast and accessible development of AI applications.
Keywords: Protocol, Open-source, LLMs, Large Language Models, Agents, Low-code
## [gpt4all](https://github.com/nomic-ai/gpt4all)
[gpt4all](https://github.com/nomic-ai/gpt4all) is an ecosystem of open-source chatbots trained on massive collections of clean assistant data including code, stories and dialogue. It offers open-source, large language models such as LLaMA and GPT-J trained in an assistant-style.

View File

@ -9,15 +9,10 @@ SHELL ["sh", "-lc"]
# The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
# to be used as arguments for docker build (so far).
ARG PYTORCH='2.9.0'
ARG PYTORCH='2.8.0'
# Example: `cu102`, `cu113`, etc.
ARG CUDA='cu126'
# This needs to be compatible with the above `PYTORCH`.
ARG TORCHCODEC='0.8.0'
ARG FLASH_ATTN='false'
RUN apt update
RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg git-lfs
RUN git lfs install
@ -26,44 +21,11 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip
ARG REF=main
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
RUN python3 -m pip install --no-cache-dir -e ./transformers[dev]
# 1. Put several commands in a single `RUN` to avoid image/layer exporting issue. Could be revised in the future.
# 2. For `torchcodec`, use `cpu` as we don't have `libnvcuvid.so` on the host runner. See https://github.com/meta-pytorch/torchcodec/issues/912
# **Important**: We need to specify `torchcodec` version if the torch version is not the latest stable one.
# 3. `set -e` means "exit immediately if any command fails".
RUN set -e; \
# Determine torch version
if [ ${#PYTORCH} -gt 0 ] && [ "$PYTORCH" != "pre" ]; then \
VERSION="torch==${PYTORCH}.*"; \
TORCHCODEC_VERSION="torchcodec==${TORCHCODEC}.*"; \
else \
VERSION="torch"; \
TORCHCODEC_VERSION="torchcodec"; \
fi; \
\
# Log the version being installed
echo "Installing torch version: $VERSION"; \
\
# Install PyTorch packages
if [ "$PYTORCH" != "pre" ]; then \
python3 -m pip install --no-cache-dir -U \
$VERSION \
torchvision \
torchaudio \
--extra-index-url https://download.pytorch.org/whl/$CUDA; \
# We need to specify the version if the torch version is not the latest stable one.
python3 -m pip install --no-cache-dir -U \
$TORCHCODEC_VERSION --extra-index-url https://download.pytorch.org/whl/cpu; \
else \
python3 -m pip install --no-cache-dir -U --pre \
torch \
torchvision \
torchaudio \
--extra-index-url https://download.pytorch.org/whl/nightly/$CUDA; \
python3 -m pip install --no-cache-dir -U --pre \
torchcodec --extra-index-url https://download.pytorch.org/whl/nightly/cpu; \
fi
# 2. Regarding `torch` part, We might need to specify proper versions for `torchvision` and `torchaudio`.
# Currently, let's not bother to specify their versions explicitly (so installed with their latest release versions).
# 3. For `torchcodec<0.8`: this is quickly added as torch 2.9.0 + torchcodec 0.8.0 fails on our CI env. Need to remove later once they work.
RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] && [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile && echo torch=$VERSION && [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio "torchcodec<0.8" --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
RUN python3 -m pip install --no-cache-dir -U timm
@ -92,7 +54,7 @@ RUN python3 -m pip install --no-cache-dir bitsandbytes
RUN python3 -m pip install --no-cache-dir quanto
# After using A10 as CI runner, let's run FA2 tests
RUN [ "$FLASH_ATTN" != "false" ] && python3 -m pip uninstall -y ninja && python3 -m pip install --no-cache-dir ninja && python3 -m pip install flash-attn --no-cache-dir --no-build-isolation || echo "Don't install FA2 with nightly torch"
RUN [ "$PYTORCH" != "pre" ] && python3 -m pip uninstall -y ninja && python3 -m pip install --no-cache-dir ninja && python3 -m pip install flash-attn --no-cache-dir --no-build-isolation || echo "Don't install FA2 with nightly torch"
# TODO (ydshieh): check this again
# `quanto` will install `ninja` which leads to many `CUDA error: an illegal memory access ...` in some model tests

View File

@ -38,7 +38,7 @@ pip install transformers[dev]
or for an editable install:
```bash
pip install -e ".[dev]"
pip install -e .[dev]
```
inside the Transformers repo. Since the number of optional dependencies of Transformers has grown a lot, it's possible you don't manage to get all of them. If the dev install fails, make sure to install PyTorch then do
@ -50,7 +50,7 @@ pip install transformers[quality]
or for an editable install:
```bash
pip install -e ".[quality]"
pip install -e .[quality]
```
## Tests

View File

@ -37,7 +37,7 @@ pip install transformers[dev]
o una instalación editable:
```bash
pip install -e ".[dev]"
pip install -e .[dev]
```
del repositorio de Transformers.

View File

@ -37,7 +37,7 @@ pip install transformers[dev]
o un'installazione modificabile:
```bash
pip install -e ".[dev]"
pip install -e .[dev]
```
all'interno del repo Transformers.

View File

@ -40,7 +40,7 @@ pip install transformers[dev]
```bash
pip install -e ".[dev]"
pip install -e .[dev]
```
トランスフォーマーズのリポジトリ内で作業しています。トランスフォーマーズのオプションの依存関係の数が増えたため、すべてを取得できない可能性があります。開発用インストールが失敗した場合、作業しているディープラーニングフレームワークPyTorch、TensorFlow、および/またはFlaxをインストールし、次の手順を実行してください。
@ -53,7 +53,7 @@ pip install transformers[quality]
または編集可能なインストールの場合:
```bash
pip install -e ".[quality]"
pip install -e .[quality]
```
## Tests

View File

@ -37,7 +37,7 @@ pip install transformers[dev]
또는 Transformers 저장소 내에 편집 가능한 설치가 필요합니다:
```bash
pip install -e ".[dev]"
pip install -e .[dev]
```
Transformers의 선택적 종속성 수가 많이 늘어났기 때문에 개발 설치를 실패할 수도 있습니다. 개발 설치가 실패하는 경우, 작업 중인 Deep Learning 프레임워크 (PyTorch, TensorFlow 및/또는 Flax)를 설치하고 다음 명령을 실행하세요.
@ -49,7 +49,7 @@ pip install transformers[quality]
편집 가능한 설치의 경우는 다음 명령을 실행하세요.
```bash
pip install -e ".[quality]"
pip install -e .[quality]
```

View File

@ -308,19 +308,11 @@ def main():
api = HfApi()
repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
os.makedirs(args.output_dir, exist_ok=True)
gitignore_path = os.path.join(args.output_dir, ".gitignore")
content = ""
if os.path.exists(gitignore_path):
with open(gitignore_path, "r") as f:
content = f.read()
with open(gitignore_path, "a") as f:
if content and not content.endswith("\n"):
f.write("\n")
if "step_*" not in content:
f.write("step_*\n")
if "epoch_*" not in content:
f.write("epoch_*\n")
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore:
gitignore.write("step_*\n")
if "epoch_*" not in gitignore:
gitignore.write("epoch_*\n")
elif args.output_dir is not None:
os.makedirs(args.output_dir, exist_ok=True)
accelerator.wait_for_everyone()

View File

@ -33,9 +33,9 @@ You can open any page of the documentation as a notebook in Colab (there is a bu
| [Quicktour of the library](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/quicktour.ipynb) | A presentation of the various APIs in Transformers |[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/quicktour.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/en/transformers_doc/quicktour.ipynb)| |
| [Summary of the tasks](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/task_summary.ipynb) | How to run the models of the Transformers library task by task |[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/task_summary.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/en/task_summary.ipynb)| |
| [Preprocessing data](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/preprocessing.ipynb) | How to use a tokenizer to preprocess your data |[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/preprocessing.ipynb) | [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/en/preprocessing.ipynb)||
| [Fine-tuning a pretrained model](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/training.ipynb) | How to use the Trainer to fine-tune a pretrained model |[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/training.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/en/training.ipynb)|[![Open in AMD Dev Cloud](https://oneclickamd.ai/static/amd.svg?v=2)](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/transformers_doc/en/training.ipynb)|
| [Summary of the tokenizers](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/tokenizer_summary.ipynb) | The differences between the tokenizers algorithm |[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/tokenizer_summary.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/en/tokenizer_summary.ipynb)|[![Open in AMD Dev Cloud](https://oneclickamd.ai/static/amd.svg?v=2)](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/transformers_doc/en/tokenizer_summary.ipynb )|
| [Multilingual models](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/multilingual.ipynb) | How to use the multilingual models of the library |[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/multilingual.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/en/multilingual.ipynb)|[![Open in AMD Dev Cloud](https://oneclickamd.ai/static/amd.svg?v=2)](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/transformers_doc/en/multilingual.ipynb)|
| [Fine-tuning a pretrained model](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/training.ipynb) | How to use the Trainer to fine-tune a pretrained model |[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/training.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/en/training.ipynb)| |
| [Summary of the tokenizers](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/tokenizer_summary.ipynb) | The differences between the tokenizers algorithm |[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/tokenizer_summary.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/en/tokenizer_summary.ipynb)|[![Open in AMD Dev Cloud](https://oneclickamd.ai/static/amd.svg?v=2)](http://oneclickamd.ai/github/huggingface/notebooks/blob/main/transformers_doc/en/tokenizer_summary.ipynb )|
| [Multilingual models](https://github.com/huggingface/notebooks/blob/main/transformers_doc/en/multilingual.ipynb) | How to use the multilingual models of the library |[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/en/multilingual.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/en/multilingual.ipynb)|[![Open in AMD Dev Cloud](https://oneclickamd.ai/static/amd.svg)](http://oneclickamd.ai/github/huggingface/notebooks/blob/main/transformers_doc/en/multilingual.ipynb)|
### PyTorch Examples
@ -43,14 +43,14 @@ You can open any page of the documentation as a notebook in Colab (there is a bu
| Notebook | Description | | | |
|:----------|:-------------|:-------------|:-------------|------:|
| [Train your tokenizer](https://github.com/huggingface/notebooks/blob/main/examples/tokenizer_training.ipynb) | How to train and use your very own tokenizer |[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/tokenizer_training.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/tokenizer_training.ipynb)|[![Open in AMD Dev Cloud](https://oneclickamd.ai/static/amd.svg?v=2)](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/tokenizer_training.ipynb)|
| [Train your language model](https://github.com/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb) | How to easily start using transformers |[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)|[![Open in AMD Dev Cloud](https://oneclickamd.ai/static/amd.svg?v=2)](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)|
| [Train your tokenizer](https://github.com/huggingface/notebooks/blob/main/examples/tokenizer_training.ipynb) | How to train and use your very own tokenizer |[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/tokenizer_training.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/tokenizer_training.ipynb)|[![Open in AMD Dev Cloud](https://oneclickamd.ai/static/amd.svg?v=2)](http://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/tokenizer_training.ipynb)|
| [Train your language model](https://github.com/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb) | How to easily start using transformers |[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)|[![Open in AMD Dev Cloud](https://oneclickamd.ai/static/amd.svg?v=2)](http://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)|
| [How to fine-tune a model on text classification](https://github.com/huggingface/notebooks/blob/main/examples/text_classification.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on any GLUE task. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/text_classification.ipynb)| |
| [How to fine-tune a model on language modeling](https://github.com/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)| [![Open in AMD Dev Cloud](https://oneclickamd.ai/static/amd.svg?v=2)](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)|
| [How to fine-tune a model on token classification](https://github.com/huggingface/notebooks/blob/main/examples/token_classification.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/token_classification.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/token_classification.ipynb)|[![Open in AMD Dev Cloud](https://oneclickamd.ai/static/amd.svg?v=2)](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/token_classification.ipynb)|
| [How to fine-tune a model on question answering](https://github.com/huggingface/notebooks/blob/main/examples/question_answering.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/question_answering.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/question_answering.ipynb)|[![Open in AMD Dev Cloud](https://oneclickamd.ai/static/amd.svg?v=2)](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/question_answering.ipynb)|
| [How to fine-tune a model on multiple choice](https://github.com/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)|[![Open in AMD Dev Cloud](https://oneclickamd.ai/static/amd.svg?v=2)](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)|
| [How to fine-tune a model on translation](https://github.com/huggingface/notebooks/blob/main/examples/translation.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on WMT. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/translation.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/translation.ipynb)|[![Open in AMD Dev Cloud](https://oneclickamd.ai/static/amd.svg?v=2)](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/translation.ipynb)|
| [How to fine-tune a model on language modeling](https://github.com/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)| [![Open in AMD Dev Cloud](https://oneclickamd.ai/static/amd.svg?v=2)](http://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)|
| [How to fine-tune a model on token classification](https://github.com/huggingface/notebooks/blob/main/examples/token_classification.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/token_classification.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/token_classification.ipynb)| |
| [How to fine-tune a model on question answering](https://github.com/huggingface/notebooks/blob/main/examples/question_answering.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/question_answering.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/question_answering.ipynb)| |
| [How to fine-tune a model on multiple choice](https://github.com/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)| |
| [How to fine-tune a model on translation](https://github.com/huggingface/notebooks/blob/main/examples/translation.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on WMT. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/translation.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/translation.ipynb)| |
| [How to fine-tune a model on summarization](https://github.com/huggingface/notebooks/blob/main/examples/summarization.ipynb)| Show how to preprocess the data and fine-tune a pretrained model on XSUM. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/summarization.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/summarization.ipynb)| |
| [How to train a language model from scratch](https://github.com/huggingface/blog/blob/main/notebooks/01_how_to_train.ipynb)| Highlight all the steps to effectively train Transformer model on custom data | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/blog/blob/main/notebooks/01_how_to_train.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/blog/blob/main/notebooks/01_how_to_train.ipynb)| |
| [How to generate text](https://github.com/huggingface/blog/blob/main/notebooks/02_how_to_generate.ipynb)| How to use different decoding methods for language generation with transformers | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/blog/blob/main/notebooks/02_how_to_generate.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/blog/blob/main/notebooks/02_how_to_generate.ipynb)| |
@ -58,16 +58,16 @@ You can open any page of the documentation as a notebook in Colab (there is a bu
#### Computer Vision[[pytorch-cv]]
| Notebook | Description | | | |
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------|------:|
| [How to fine-tune a model on image classification (Torchvision)](https://github.com/huggingface/notebooks/blob/main/examples/image_classification.ipynb) | Show how to preprocess the data using Torchvision and fine-tune any pretrained Vision model on Image Classification | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification.ipynb) | [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_classification.ipynb)| [![Open in AMD Dev Cloud](https://oneclickamd.ai/static/amd.svg?v=2)](https://oneclickamd.ai/github/huggingface/notebooks/blob/main/examples/image_classification.ipynb)|
| [How to fine-tune a model on image classification (Albumentations)](https://github.com/huggingface/notebooks/blob/main/examples/image_classification_albumentations.ipynb) | Show how to preprocess the data using Albumentations and fine-tune any pretrained Vision model on Image Classification | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification_albumentations.ipynb) | [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_classification_albumentations.ipynb)| |
| [How to fine-tune a model on image classification (Kornia)](https://github.com/huggingface/notebooks/blob/main/examples/image_classification_kornia.ipynb) | Show how to preprocess the data using Kornia and fine-tune any pretrained Vision model on Image Classification | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification_kornia.ipynb) | [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_classification_kornia.ipynb)| |
| [How to perform zero-shot object detection with OWL-ViT](https://github.com/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb) | Show how to perform zero-shot object detection on images with text queries | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb)| |
| [How to fine-tune an image captioning model](https://github.com/huggingface/notebooks/blob/main/examples/image_captioning_blip.ipynb) | Show how to fine-tune BLIP for image captioning on a custom dataset | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_captioning_blip.ipynb) | [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_captioning_blip.ipynb)| |
| [How to build an image similarity system with Transformers](https://github.com/huggingface/notebooks/blob/main/examples/image_similarity.ipynb) | Show how to build an image similarity system | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_similarity.ipynb) | [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_similarity.ipynb)| |
| [How to fine-tune a SegFormer model on semantic segmentation](https://github.com/huggingface/notebooks/blob/main/examples/semantic_segmentation.ipynb) | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/semantic_segmentation.ipynb) | [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/semantic_segmentation.ipynb)| |
| [How to fine-tune a VideoMAE model on video classification](https://github.com/huggingface/notebooks/blob/main/examples/video_classification.ipynb) | Show how to preprocess the data and fine-tune a pretrained VideoMAE model on Video Classification | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/video_classification.ipynb) | [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/video_classification.ipynb)| |
| Notebook | Description | | |
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------:|
| [How to fine-tune a model on image classification (Torchvision)](https://github.com/huggingface/notebooks/blob/main/examples/image_classification.ipynb) | Show how to preprocess the data using Torchvision and fine-tune any pretrained Vision model on Image Classification | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification.ipynb) | [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_classification.ipynb)|
| [How to fine-tune a model on image classification (Albumentations)](https://github.com/huggingface/notebooks/blob/main/examples/image_classification_albumentations.ipynb) | Show how to preprocess the data using Albumentations and fine-tune any pretrained Vision model on Image Classification | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification_albumentations.ipynb) | [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_classification_albumentations.ipynb)|
| [How to fine-tune a model on image classification (Kornia)](https://github.com/huggingface/notebooks/blob/main/examples/image_classification_kornia.ipynb) | Show how to preprocess the data using Kornia and fine-tune any pretrained Vision model on Image Classification | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification_kornia.ipynb) | [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_classification_kornia.ipynb)|
| [How to perform zero-shot object detection with OWL-ViT](https://github.com/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb) | Show how to perform zero-shot object detection on images with text queries | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb)|
| [How to fine-tune an image captioning model](https://github.com/huggingface/notebooks/blob/main/examples/image_captioning_blip.ipynb) | Show how to fine-tune BLIP for image captioning on a custom dataset | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_captioning_blip.ipynb) | [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_captioning_blip.ipynb)|
| [How to build an image similarity system with Transformers](https://github.com/huggingface/notebooks/blob/main/examples/image_similarity.ipynb) | Show how to build an image similarity system | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_similarity.ipynb) | [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/image_similarity.ipynb)|
| [How to fine-tune a SegFormer model on semantic segmentation](https://github.com/huggingface/notebooks/blob/main/examples/semantic_segmentation.ipynb) | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/semantic_segmentation.ipynb) | [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/semantic_segmentation.ipynb)|
| [How to fine-tune a VideoMAE model on video classification](https://github.com/huggingface/notebooks/blob/main/examples/video_classification.ipynb) | Show how to preprocess the data and fine-tune a pretrained VideoMAE model on Video Classification | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/video_classification.ipynb) | [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/video_classification.ipynb)|
#### Audio[[pytorch-audio]]

View File

@ -104,7 +104,7 @@ _deps = [
"deepspeed>=0.9.3",
"diffusers",
"dill<0.3.5",
"evaluate>=0.4.6",
"evaluate>=0.2.0",
"faiss-cpu",
"fastapi",
"filelock",

View File

@ -14,7 +14,7 @@ deps = {
"deepspeed": "deepspeed>=0.9.3",
"diffusers": "diffusers",
"dill": "dill<0.3.5",
"evaluate": "evaluate>=0.4.6",
"evaluate": "evaluate>=0.2.0",
"faiss-cpu": "faiss-cpu",
"fastapi": "fastapi",
"filelock": "filelock",

View File

@ -314,14 +314,13 @@ def _load_state_dict_into_zero3_model(model_to_load, state_dict):
args = (state_dict, prefix, local_metadata, True, [], [], error_msgs)
# Parameters of module and children will start with prefix. We can exit early if there are none in this
# state_dict
if is_deepspeed_zero3_enabled():
if is_deepspeed_zero3_enabled() and len([key for key in state_dict if key.startswith(prefix)]) > 0:
import deepspeed
# In sharded models, each shard has only part of the full state_dict, so only gather
# parameters that are in the current state_dict.
named_parameters = dict(module.named_parameters(prefix=prefix[:-1], recurse=False))
params_to_gather = [named_parameters[k] for k in named_parameters if k in state_dict]
params_to_gather = [named_parameters[k] for k in state_dict if k in named_parameters]
if len(params_to_gather) > 0:
# because zero3 puts placeholders in model params, this context
# manager gathers (unpartitions) the params of the current layer, then loads from

View File

@ -628,7 +628,7 @@ def maybe_load_adapters(
**adapter_kwargs,
):
if pretrained_model_name_or_path is None or not is_peft_available():
return None, pretrained_model_name_or_path, adapter_kwargs
return None, pretrained_model_name_or_path
token = download_kwargs.get("token")
@ -651,15 +651,13 @@ def maybe_load_adapters(
_adapter_model_path = adapter_kwargs.pop("_adapter_model_path", None)
token_from_adapter_kwargs = adapter_kwargs.pop("token", None)
if _adapter_model_path is None:
_adapter_model_path = find_adapter_config_file(
pretrained_model_name_or_path,
cache_dir=download_kwargs.get("cache_dir"),
force_download=bool(download_kwargs.get("force_download", False)),
proxies=download_kwargs.get("proxies"),
token=token or token_from_adapter_kwargs,
token=token,
revision=download_kwargs.get("revision"),
local_files_only=bool(download_kwargs.get("local_files_only", False)),
subfolder=download_kwargs.get("subfolder", ""),
@ -672,4 +670,4 @@ def maybe_load_adapters(
_adapter_model_path = pretrained_model_name_or_path
pretrained_model_name_or_path = json.load(f)["base_model_name_or_path"]
return _adapter_model_path, pretrained_model_name_or_path, adapter_kwargs
return _adapter_model_path, pretrained_model_name_or_path

View File

@ -4353,7 +4353,7 @@ class PreTrainedModel(nn.Module, EmbeddingAccessMixin, ModuleUtilsMixin, PushToH
if adapter_kwargs is None:
adapter_kwargs = {}
_adapter_model_path, pretrained_model_name_or_path, adapter_kwargs = maybe_load_adapters(
_adapter_model_path, pretrained_model_name_or_path = maybe_load_adapters(
pretrained_model_name_or_path,
download_kwargs_with_commit,
**adapter_kwargs,

View File

@ -538,12 +538,12 @@ class BartEncoder(BartPreTrainedModel):
self.max_source_positions = config.max_position_embeddings
embed_scale = math.sqrt(embed_dim) if config.scale_embedding else 1.0
self.embed_tokens = BartScaledWordEmbedding(
config.vocab_size, embed_dim, self.padding_idx, embed_scale=embed_scale
)
if embed_tokens is not None:
self.embed_tokens = embed_tokens
else:
self.embed_tokens = BartScaledWordEmbedding(
config.vocab_size, embed_dim, self.padding_idx, embed_scale=embed_scale
)
self.embed_tokens.weight = embed_tokens.weight
self.embed_positions = BartLearnedPositionalEmbedding(
config.max_position_embeddings,
@ -682,12 +682,12 @@ class BartDecoder(BartPreTrainedModel):
self.max_target_positions = config.max_position_embeddings
embed_scale = math.sqrt(config.d_model) if config.scale_embedding else 1.0
self.embed_tokens = BartScaledWordEmbedding(
config.vocab_size, config.d_model, self.padding_idx, embed_scale=embed_scale
)
if embed_tokens is not None:
self.embed_tokens = embed_tokens
else:
self.embed_tokens = BartScaledWordEmbedding(
config.vocab_size, config.d_model, self.padding_idx, embed_scale=embed_scale
)
self.embed_tokens.weight = embed_tokens.weight
self.embed_positions = BartLearnedPositionalEmbedding(
config.max_position_embeddings,

View File

@ -22,7 +22,7 @@ import torch
from torch import nn
from ...activations import ACT2FN
from ...masking_utils import create_causal_mask
from ...modeling_attn_mask_utils import _create_4d_causal_attention_mask, _prepare_4d_attention_mask
from ...modeling_layers import GradientCheckpointingLayer
from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, ImageClassifierOutput
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
@ -310,6 +310,7 @@ class CLIPAttention(nn.Module):
self,
hidden_states: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
causal_attention_mask: Optional[torch.Tensor] = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
"""Input shape: Batch x Time x Channel"""
@ -323,6 +324,15 @@ class CLIPAttention(nn.Module):
queries = queries.view(batch_size, seq_length, -1, self.head_dim).transpose(1, 2)
keys = keys.view(batch_size, seq_length, -1, self.head_dim).transpose(1, 2)
values = values.view(batch_size, seq_length, -1, self.head_dim).transpose(1, 2)
# CLIP text model uses both `causal_attention_mask` and `attention_mask`
# in case FA2 kernel is called, `is_causal` should be inferred from `causal_attention_mask`
if self.config._attn_implementation == "flash_attention_2":
self.is_causal = causal_attention_mask is not None
else:
if attention_mask is not None and causal_attention_mask is not None:
attention_mask = attention_mask + causal_attention_mask
elif causal_attention_mask is not None:
attention_mask = causal_attention_mask
attention_interface: Callable = eager_attention_forward
if self.config._attn_implementation != "eager":
@ -334,12 +344,13 @@ class CLIPAttention(nn.Module):
keys,
values,
attention_mask,
is_causal=self.is_causal,
scaling=self.scale,
dropout=0.0 if not self.training else self.dropout,
**kwargs,
)
attn_output = attn_output.reshape(batch_size, seq_length, -1).contiguous()
attn_output = attn_output.reshape(batch_size, seq_length, embed_dim).contiguous()
attn_output = self.out_proj(attn_output)
return attn_output, attn_weights
@ -373,14 +384,16 @@ class CLIPEncoderLayer(GradientCheckpointingLayer):
self,
hidden_states: torch.Tensor,
attention_mask: torch.Tensor,
causal_attention_mask: torch.Tensor,
**kwargs: Unpack[TransformersKwargs],
) -> torch.FloatTensor:
residual = hidden_states
hidden_states = self.layer_norm1(hidden_states)
hidden_states, _ = self.self_attn(
hidden_states, attn_weights = self.self_attn(
hidden_states=hidden_states,
attention_mask=attention_mask,
causal_attention_mask=causal_attention_mask,
**kwargs,
)
hidden_states = residual + hidden_states
@ -484,6 +497,7 @@ class CLIPEncoder(nn.Module):
self,
inputs_embeds,
attention_mask: Optional[torch.Tensor] = None,
causal_attention_mask: Optional[torch.Tensor] = None,
**kwargs: Unpack[TransformersKwargs],
) -> BaseModelOutput:
r"""
@ -498,6 +512,13 @@ class CLIPEncoder(nn.Module):
- 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**.
[What are attention masks?](../glossary#attention-mask)
causal_attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Causal mask for the text model. Mask values selected in `[0, 1]`:
- 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**.
[What are attention masks?](../glossary#attention-mask)
"""
hidden_states = inputs_embeds
@ -505,6 +526,7 @@ class CLIPEncoder(nn.Module):
hidden_states = encoder_layer(
hidden_states,
attention_mask,
causal_attention_mask,
**kwargs,
)
@ -541,19 +563,17 @@ class CLIPTextTransformer(nn.Module):
hidden_states = self.embeddings(input_ids=input_ids, position_ids=position_ids)
attention_mask = create_causal_mask(
config=self.config,
input_embeds=hidden_states,
attention_mask=attention_mask,
cache_position=torch.arange(hidden_states.shape[1], device=hidden_states.device),
past_key_values=None,
causal_attention_mask = _create_4d_causal_attention_mask(
input_shape, hidden_states.dtype, device=hidden_states.device
)
kwargs.pop("is_causal", None)
if attention_mask is not None and self.config._attn_implementation != "flash_attention_2":
attention_mask = _prepare_4d_attention_mask(attention_mask, hidden_states.dtype)
encoder_outputs: BaseModelOutput = self.encoder(
inputs_embeds=hidden_states,
attention_mask=attention_mask,
is_causal=True,
causal_attention_mask=causal_attention_mask,
**kwargs,
)
@ -598,6 +618,7 @@ class CLIPTextModel(CLIPPreTrainedModel):
input_modalities = "text"
_no_split_modules = ["CLIPTextEmbeddings", "CLIPEncoderLayer"]
_supports_flash_attn = False # mask creation only accounts for sdpa/eager
def __init__(self, config: CLIPTextConfig):
super().__init__(config)
@ -611,7 +632,8 @@ class CLIPTextModel(CLIPPreTrainedModel):
def set_input_embeddings(self, value):
self.text_model.embeddings.token_embedding = value
@check_model_inputs(tie_last_hidden_states=False)
@check_model_inputs()
@can_return_tuple
@auto_docstring
def forward(
self,
@ -704,6 +726,7 @@ class CLIPVisionModel(CLIPPreTrainedModel):
return self.vision_model.embeddings.patch_embedding
@check_model_inputs(tie_last_hidden_states=False)
@can_return_tuple
@auto_docstring
def forward(
self,
@ -743,6 +766,7 @@ class CLIPVisionModel(CLIPPreTrainedModel):
class CLIPModel(CLIPPreTrainedModel):
config: CLIPConfig
_no_split_modules = ["CLIPTextEmbeddings", "CLIPEncoderLayer", "CLIPVisionEmbeddings"]
_supports_flash_attn = False # mask creation only accounts for sdpa/eager
def __init__(self, config: CLIPConfig):
super().__init__(config)
@ -942,6 +966,7 @@ class CLIPTextModelWithProjection(CLIPPreTrainedModel):
config: CLIPTextConfig
input_modalities = "text"
_supports_flash_attn = False
_no_split_modules = ["CLIPTextEmbeddings", "CLIPEncoderLayer"]
def __init__(self, config: CLIPTextConfig):
@ -961,7 +986,8 @@ class CLIPTextModelWithProjection(CLIPPreTrainedModel):
def set_input_embeddings(self, value):
self.text_model.embeddings.token_embedding = value
@check_model_inputs(tie_last_hidden_states=False)
@check_model_inputs()
@can_return_tuple
@auto_docstring
def forward(
self,
@ -1023,6 +1049,7 @@ class CLIPVisionModelWithProjection(CLIPPreTrainedModel):
return self.vision_model.embeddings.patch_embedding
@check_model_inputs(tie_last_hidden_states=False)
@can_return_tuple
@auto_docstring
def forward(
self,
@ -1090,7 +1117,8 @@ class CLIPForImageClassification(CLIPPreTrainedModel):
# Initialize weights and apply final processing
self.post_init()
@check_model_inputs(tie_last_hidden_states=False)
@check_model_inputs()
@can_return_tuple
@auto_docstring
def forward(
self,

View File

@ -1392,7 +1392,7 @@ class Emu3Model(Emu3PreTrainedModel):
image_features = torch.split(image_features, split_sizes)
return image_features
@torch.no_grad()
@torch.no_grad
def decode_image_tokens(self, image_tokens: torch.LongTensor, height: int, width: int):
"""
Decodes generated image tokens from language model to continuous pixel values

View File

@ -946,7 +946,7 @@ class Emu3Model(Emu3PreTrainedModel):
image_features = torch.split(image_features, split_sizes)
return image_features
@torch.no_grad()
@torch.no_grad
def decode_image_tokens(self, image_tokens: torch.LongTensor, height: int, width: int):
"""
Decodes generated image tokens from language model to continuous pixel values

View File

@ -473,9 +473,6 @@ def convert_florence2_checkpoint(hf_model_id, pytorch_dump_folder, output_hub_pa
vision_config = convert_config(hf_config.vision_config.__dict__)
text_config = hf_config.text_config.__dict__
if text_config.get("model_type") == "florence2_language":
text_config["model_type"] = "bart"
config = Florence2Config(
text_config=text_config,
vision_config=vision_config,

View File

@ -156,7 +156,7 @@ class Gemma3TextConfig(PreTrainedConfig):
layer_types: Optional[list[str]] = None,
final_logit_softcapping: Optional[float] = None,
attn_logit_softcapping: Optional[float] = None,
rope_parameters: Optional[RopeParameters | dict[str, RopeParameters]] = None,
rope_parameters: Optional[RopeParameters | dict[RopeParameters]] = None,
use_bidirectional_attention: Optional[bool] = False,
**kwargs,
):
@ -186,16 +186,10 @@ class Gemma3TextConfig(PreTrainedConfig):
self.final_logit_softcapping = final_logit_softcapping
self.attn_logit_softcapping = attn_logit_softcapping
self.layer_types = layer_types
# Try to set `rope_scaling` if available, otherwise use `rope_parameters`
if (rope_scaling := kwargs.pop("rope_scaling", None)) is not None:
if rope_parameters is None:
rope_parameters = {"sliding_attention": {"rope_type": "default"}, "full_attention": rope_scaling}
elif "full_attention" in rope_parameters:
rope_parameters["full_attention"].update(rope_scaling)
else:
rope_parameters.update(rope_scaling)
rope_scaling = kwargs.pop("rope_scaling", None)
if rope_scaling is not None:
rope_parameters = {"sliding_attention": {"rope_type": "default"}, "full_attention": rope_scaling}
self.rope_parameters = rope_parameters
self.use_bidirectional_attention = use_bidirectional_attention
if use_bidirectional_attention:

View File

@ -191,10 +191,7 @@ _VARIANTS = {
num_hidden_layers=34,
num_key_value_heads=4,
sliding_window=1024,
rope_parameters={
"full_attention": {"rope_type": "linear", "factor": 8.0},
"sliding_attention": {"rope_type": "default"},
},
rope_parameters={"rope_type": "linear", "factor": 8.0}, # used for global RoPE only
rope_theta=1_000_000,
rope_local_base_freq=10_000,
attn_logit_softcapping=None,
@ -212,10 +209,7 @@ _VARIANTS = {
num_hidden_layers=48,
num_key_value_heads=8,
sliding_window=1024,
rope_parameters={
"full_attention": {"rope_type": "linear", "factor": 8.0},
"sliding_attention": {"rope_type": "default"},
},
rope_parameters={"rope_type": "linear", "factor": 8.0}, # used for global RoPE only
rope_theta=1_000_000,
rope_local_base_freq=10_000,
attn_logit_softcapping=None,
@ -233,10 +227,7 @@ _VARIANTS = {
num_key_value_heads=16,
head_dim=128,
sliding_window=1024,
rope_parameters={
"full_attention": {"rope_type": "linear", "factor": 8.0},
"sliding_attention": {"rope_type": "default"},
},
rope_parameters={"rope_type": "linear", "factor": 8.0}, # used for global RoPE only
rope_theta=1_000_000,
rope_local_base_freq=10_000,
attn_logit_softcapping=None,

View File

@ -171,7 +171,7 @@ class Gemma3TextConfig(Gemma2Config, PreTrainedConfig):
layer_types: Optional[list[str]] = None,
final_logit_softcapping: Optional[float] = None,
attn_logit_softcapping: Optional[float] = None,
rope_parameters: Optional[RopeParameters | dict[str, RopeParameters]] = None,
rope_parameters: Optional[RopeParameters | dict[RopeParameters]] = None,
use_bidirectional_attention: Optional[bool] = False,
**kwargs,
):
@ -201,16 +201,10 @@ class Gemma3TextConfig(Gemma2Config, PreTrainedConfig):
self.final_logit_softcapping = final_logit_softcapping
self.attn_logit_softcapping = attn_logit_softcapping
self.layer_types = layer_types
# Try to set `rope_scaling` if available, otherwise use `rope_parameters`
if (rope_scaling := kwargs.pop("rope_scaling", None)) is not None:
if rope_parameters is None:
rope_parameters = {"sliding_attention": {"rope_type": "default"}, "full_attention": rope_scaling}
elif "full_attention" in rope_parameters:
rope_parameters["full_attention"].update(rope_scaling)
else:
rope_parameters.update(rope_scaling)
rope_scaling = kwargs.pop("rope_scaling", None)
if rope_scaling is not None:
rope_parameters = {"sliding_attention": {"rope_type": "default"}, "full_attention": rope_scaling}
self.rope_parameters = rope_parameters
self.use_bidirectional_attention = use_bidirectional_attention
if use_bidirectional_attention:

View File

@ -1283,7 +1283,7 @@ class JanusForConditionalGeneration(JanusPreTrainedModel, GenerationMixin):
decoded_image = decoded_image.permute(0, 2, 3, 1)
return decoded_image
@torch.no_grad()
@torch.no_grad
def generate(
self,
inputs: Optional[torch.Tensor] = None,

View File

@ -1099,7 +1099,7 @@ class JanusForConditionalGeneration(JanusPreTrainedModel, GenerationMixin):
decoded_image = decoded_image.permute(0, 2, 3, 1)
return decoded_image
@torch.no_grad()
@torch.no_grad
def generate(
self,
inputs: Optional[torch.Tensor] = None,

View File

@ -209,8 +209,8 @@ class Lfm2VlImageProcessorFast(BaseImageProcessorFast):
do_normalize = True
do_pad = True
return_row_col_info = False
image_mean = IMAGENET_STANDARD_MEAN
image_std = IMAGENET_STANDARD_STD
image_mean = IMAGENET_STANDARD_STD
image_std = IMAGENET_STANDARD_MEAN
valid_kwargs = Lfm2VlImageProcessorKwargs
model_input_names = ["pixel_values", "pixel_attention_mask", "spatial_shapes"]

View File

@ -12,7 +12,7 @@ import torch
from torch import nn
from ...activations import ACT2FN
from ...masking_utils import create_causal_mask
from ...modeling_attn_mask_utils import _create_4d_causal_attention_mask, _prepare_4d_attention_mask
from ...modeling_layers import GradientCheckpointingLayer
from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling, ImageClassifierOutput
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
@ -200,6 +200,7 @@ class MetaClip2Attention(nn.Module):
self,
hidden_states: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
causal_attention_mask: Optional[torch.Tensor] = None,
**kwargs: Unpack[TransformersKwargs],
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
"""Input shape: Batch x Time x Channel"""
@ -213,6 +214,15 @@ class MetaClip2Attention(nn.Module):
queries = queries.view(batch_size, seq_length, -1, self.head_dim).transpose(1, 2)
keys = keys.view(batch_size, seq_length, -1, self.head_dim).transpose(1, 2)
values = values.view(batch_size, seq_length, -1, self.head_dim).transpose(1, 2)
# METACLIP_2 text model uses both `causal_attention_mask` and `attention_mask`
# in case FA2 kernel is called, `is_causal` should be inferred from `causal_attention_mask`
if self.config._attn_implementation == "flash_attention_2":
self.is_causal = causal_attention_mask is not None
else:
if attention_mask is not None and causal_attention_mask is not None:
attention_mask = attention_mask + causal_attention_mask
elif causal_attention_mask is not None:
attention_mask = causal_attention_mask
attention_interface: Callable = eager_attention_forward
if self.config._attn_implementation != "eager":
@ -224,12 +234,13 @@ class MetaClip2Attention(nn.Module):
keys,
values,
attention_mask,
is_causal=self.is_causal,
scaling=self.scale,
dropout=0.0 if not self.training else self.dropout,
**kwargs,
)
attn_output = attn_output.reshape(batch_size, seq_length, -1).contiguous()
attn_output = attn_output.reshape(batch_size, seq_length, embed_dim).contiguous()
attn_output = self.out_proj(attn_output)
return attn_output, attn_weights
@ -263,14 +274,16 @@ class MetaClip2EncoderLayer(GradientCheckpointingLayer):
self,
hidden_states: torch.Tensor,
attention_mask: torch.Tensor,
causal_attention_mask: torch.Tensor,
**kwargs: Unpack[TransformersKwargs],
) -> torch.FloatTensor:
residual = hidden_states
hidden_states = self.layer_norm1(hidden_states)
hidden_states, _ = self.self_attn(
hidden_states, attn_weights = self.self_attn(
hidden_states=hidden_states,
attention_mask=attention_mask,
causal_attention_mask=causal_attention_mask,
**kwargs,
)
hidden_states = residual + hidden_states
@ -374,6 +387,7 @@ class MetaClip2Encoder(nn.Module):
self,
inputs_embeds,
attention_mask: Optional[torch.Tensor] = None,
causal_attention_mask: Optional[torch.Tensor] = None,
**kwargs: Unpack[TransformersKwargs],
) -> BaseModelOutput:
r"""
@ -388,6 +402,13 @@ class MetaClip2Encoder(nn.Module):
- 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**.
[What are attention masks?](../glossary#attention-mask)
causal_attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Causal mask for the text model. Mask values selected in `[0, 1]`:
- 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**.
[What are attention masks?](../glossary#attention-mask)
"""
hidden_states = inputs_embeds
@ -395,6 +416,7 @@ class MetaClip2Encoder(nn.Module):
hidden_states = encoder_layer(
hidden_states,
attention_mask,
causal_attention_mask,
**kwargs,
)
@ -415,12 +437,14 @@ class MetaClip2TextTransformer(nn.Module):
# For `pooled_output` computation
self.eos_token_id = config.eos_token_id
@check_model_inputs(tie_last_hidden_states=False)
@auto_docstring
def forward(
self,
input_ids,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.Tensor] = None,
use_cache: Optional[bool] = None,
**kwargs: Unpack[TransformersKwargs],
) -> BaseModelOutputWithPooling:
input_shape = input_ids.size()
@ -428,19 +452,21 @@ class MetaClip2TextTransformer(nn.Module):
hidden_states = self.embeddings(input_ids=input_ids, position_ids=position_ids)
attention_mask = create_causal_mask(
config=self.config,
input_embeds=hidden_states,
attention_mask=attention_mask,
cache_position=torch.arange(hidden_states.shape[1], device=hidden_states.device),
past_key_values=None,
# CLIP's text model uses causal mask, prepare it here.
# https://github.com/openai/CLIP/blob/cfcffb90e69f37bf2ff1e988237a0fbe41f33c04/clip/model.py#L324
causal_attention_mask = _create_4d_causal_attention_mask(
input_shape, hidden_states.dtype, device=hidden_states.device
)
kwargs.pop("is_causal", None)
# expand attention_mask
if attention_mask is not None and self.config._attn_implementation != "flash_attention_2":
# [batch_size, seq_len] -> [batch_size, 1, tgt_seq_len, src_seq_len]
attention_mask = _prepare_4d_attention_mask(attention_mask, hidden_states.dtype)
encoder_outputs: BaseModelOutput = self.encoder(
inputs_embeds=hidden_states,
attention_mask=attention_mask,
is_causal=True,
causal_attention_mask=causal_attention_mask,
**kwargs,
)
@ -501,6 +527,7 @@ class MetaClip2TextModel(MetaClip2PreTrainedModel):
input_modalities = "text"
_no_split_modules = ["MetaClip2TextEmbeddings", "MetaClip2EncoderLayer"]
_supports_flash_attn = False # mask creation only accounts for sdpa/eager
def __init__(self, config: MetaClip2TextConfig):
super().__init__(config)
@ -514,13 +541,16 @@ class MetaClip2TextModel(MetaClip2PreTrainedModel):
def set_input_embeddings(self, value):
self.text_model.embeddings.token_embedding = value
@check_model_inputs(tie_last_hidden_states=False)
@check_model_inputs()
@can_return_tuple
@auto_docstring
def forward(
self,
input_ids: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
**kwargs: Unpack[TransformersKwargs],
) -> BaseModelOutputWithPooling:
r"""
@ -600,6 +630,7 @@ class MetaClip2TextModelWithProjection(MetaClip2PreTrainedModel):
config: MetaClip2TextConfig
input_modalities = "text"
_supports_flash_attn = False
_no_split_modules = ["MetaClip2TextEmbeddings", "MetaClip2EncoderLayer"]
def __init__(self, config: MetaClip2TextConfig):
@ -619,13 +650,16 @@ class MetaClip2TextModelWithProjection(MetaClip2PreTrainedModel):
def set_input_embeddings(self, value):
self.text_model.embeddings.token_embedding = value
@check_model_inputs(tie_last_hidden_states=False)
@check_model_inputs()
@can_return_tuple
@auto_docstring
def forward(
self,
input_ids: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
**kwargs: Unpack[TransformersKwargs],
) -> MetaClip2TextModelOutput:
r"""
@ -758,6 +792,7 @@ class MetaClip2Model(MetaClip2PreTrainedModel):
config: MetaClip2Config
_no_split_modules = ["MetaClip2TextEmbeddings", "MetaClip2EncoderLayer", "MetaClip2VisionEmbeddings"]
_supports_flash_attn = False # mask creation only accounts for sdpa/eager
def __init__(self, config: MetaClip2Config):
super().__init__(config)
@ -1043,7 +1078,7 @@ class MetaClip2VisionModel(MetaClip2PreTrainedModel):
return self.vision_model.embeddings.patch_embedding
@check_model_inputs(tie_last_hidden_states=False)
@auto_docstring
@can_return_tuple
def forward(
self,
pixel_values: Optional[torch.FloatTensor] = None,
@ -1152,6 +1187,7 @@ class MetaClip2VisionModelWithProjection(MetaClip2PreTrainedModel):
return self.vision_model.embeddings.patch_embedding
@check_model_inputs(tie_last_hidden_states=False)
@can_return_tuple
@auto_docstring
def forward(
self,
@ -1218,7 +1254,8 @@ class MetaClip2ForImageClassification(MetaClip2PreTrainedModel):
# Initialize weights and apply final processing
self.post_init()
@check_model_inputs(tie_last_hidden_states=False)
@check_model_inputs()
@can_return_tuple
@auto_docstring
def forward(
self,

View File

@ -3,8 +3,9 @@ from typing import Optional
import torch
from torch import nn
from ...masking_utils import create_causal_mask
from ...modeling_attn_mask_utils import _create_4d_causal_attention_mask, _prepare_4d_attention_mask
from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling
from ...modeling_utils import PreTrainedModel
from ...processing_utils import Unpack
from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, logging
from ...utils.generic import check_model_inputs
@ -12,9 +13,9 @@ from ..clip.configuration_clip import CLIPConfig, CLIPTextConfig, CLIPVisionConf
from ..clip.modeling_clip import (
CLIPMLP,
CLIPAttention,
CLIPEncoderLayer,
CLIPForImageClassification,
CLIPModel,
CLIPPreTrainedModel,
CLIPTextEmbeddings,
CLIPTextModel,
CLIPTextModelWithProjection,
@ -213,9 +214,24 @@ class MetaClip2MLP(CLIPMLP):
pass
class MetaClip2EncoderLayer(CLIPEncoderLayer):
pass
@auto_docstring
class MetaClip2PreTrainedModel(CLIPPreTrainedModel):
class MetaClip2PreTrainedModel(PreTrainedModel):
config: MetaClip2Config
base_model_prefix = "metaclip_2"
input_modalities = ["image", "text"]
supports_gradient_checkpointing = True
_supports_sdpa = True
_supports_flash_attn = True
_supports_flex_attn = True
_supports_attention_backend = True
_can_record_outputs = {
"hidden_states": MetaClip2EncoderLayer,
"attentions": MetaClip2Attention,
}
def _init_weights(self, module):
"""Initialize the weights"""
@ -275,12 +291,14 @@ class MetaClip2PreTrainedModel(CLIPPreTrainedModel):
class MetaClip2TextTransformer(CLIPTextTransformer):
@check_model_inputs(tie_last_hidden_states=False)
@auto_docstring
def forward(
self,
input_ids,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.Tensor] = None,
use_cache: Optional[bool] = None,
**kwargs: Unpack[TransformersKwargs],
) -> BaseModelOutputWithPooling:
input_shape = input_ids.size()
@ -288,19 +306,21 @@ class MetaClip2TextTransformer(CLIPTextTransformer):
hidden_states = self.embeddings(input_ids=input_ids, position_ids=position_ids)
attention_mask = create_causal_mask(
config=self.config,
input_embeds=hidden_states,
attention_mask=attention_mask,
cache_position=torch.arange(hidden_states.shape[1], device=hidden_states.device),
past_key_values=None,
# CLIP's text model uses causal mask, prepare it here.
# https://github.com/openai/CLIP/blob/cfcffb90e69f37bf2ff1e988237a0fbe41f33c04/clip/model.py#L324
causal_attention_mask = _create_4d_causal_attention_mask(
input_shape, hidden_states.dtype, device=hidden_states.device
)
kwargs.pop("is_causal", None)
# expand attention_mask
if attention_mask is not None and self.config._attn_implementation != "flash_attention_2":
# [batch_size, seq_len] -> [batch_size, 1, tgt_seq_len, src_seq_len]
attention_mask = _prepare_4d_attention_mask(attention_mask, hidden_states.dtype)
encoder_outputs: BaseModelOutput = self.encoder(
inputs_embeds=hidden_states,
attention_mask=attention_mask,
is_causal=True,
causal_attention_mask=causal_attention_mask,
**kwargs,
)
@ -352,13 +372,22 @@ class MetaClip2TextModel(CLIPTextModel):
>>> pooled_output = outputs.pooler_output # pooled (EOS token) states
```"""
@check_model_inputs(tie_last_hidden_states=False)
def __init__(self, config: MetaClip2TextConfig):
super().__init__(config)
self.text_model = MetaClip2TextTransformer(config)
# Initialize weights and apply final processing
self.post_init()
@check_model_inputs()
@can_return_tuple
@auto_docstring
def forward(
self,
input_ids: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
**kwargs: Unpack[TransformersKwargs],
):
r"""
@ -380,6 +409,8 @@ class MetaClip2TextModel(CLIPTextModel):
input_ids=input_ids,
attention_mask=attention_mask,
position_ids=position_ids,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
**kwargs,
)
@ -415,13 +446,24 @@ class MetaClip2TextModelWithProjection(CLIPTextModelWithProjection):
>>> text_embeds = outputs.text_embeds
```"""
@check_model_inputs(tie_last_hidden_states=False)
@auto_docstring
def __init__(self, config: MetaClip2TextConfig):
super().__init__(config)
text_model = MetaClip2TextModel._from_config(config)
self.text_model = text_model.text_model
self.text_projection = nn.Linear(config.hidden_size, config.projection_dim, bias=False)
# Initialize weights and apply final processing
self.post_init()
def forward(
self,
input_ids: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
**kwargs: Unpack[TransformersKwargs],
):
r"""
@ -442,6 +484,8 @@ class MetaClip2TextModelWithProjection(CLIPTextModelWithProjection):
input_ids=input_ids,
attention_mask=attention_mask,
position_ids=position_ids,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
**kwargs,
)
@ -506,8 +550,6 @@ class MetaClip2Model(CLIPModel):
# Initialize weights and apply final processing
self.post_init()
@can_return_tuple
@auto_docstring
def forward(
self,
input_ids: Optional[torch.LongTensor] = None,
@ -652,7 +694,7 @@ class MetaClip2VisionModel(CLIPVisionModel):
```"""
@check_model_inputs(tie_last_hidden_states=False)
@auto_docstring
@can_return_tuple
def forward(
self,
pixel_values: Optional[torch.FloatTensor] = None,
@ -722,8 +764,6 @@ class MetaClip2VisionModelWithProjection(CLIPVisionModelWithProjection):
>>> image_embeds = outputs.image_embeds
```"""
@check_model_inputs(tie_last_hidden_states=False)
@auto_docstring
def forward(
self,
pixel_values: Optional[torch.FloatTensor] = None,

View File

@ -25,12 +25,12 @@ import torch
import torch.nn as nn
from ...activations import ACT2FN
from ...modeling_flash_attention_utils import FlashAttentionKwargs
from ...modeling_layers import GradientCheckpointingLayer
from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
from ...processing_utils import Unpack
from ...utils import TransformersKwargs, auto_docstring, torch_int
from ...utils.generic import check_model_inputs
from .configuration_mlcd import MLCDVisionConfig
@ -259,7 +259,7 @@ class MLCDAttention(nn.Module):
hidden_states: torch.Tensor,
position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor] = None,
**kwargs: Unpack[TransformersKwargs],
**kwargs: Unpack[FlashAttentionKwargs],
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
"""Input shape: Batch x Time x Channel"""
batch_size, seq_length = hidden_states.shape[:-1]
@ -316,7 +316,7 @@ class MLCDEncoderLayer(GradientCheckpointingLayer):
hidden_states: torch.Tensor,
position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor] = None,
**kwargs: Unpack[TransformersKwargs],
output_attentions: Optional[bool] = False,
) -> tuple[torch.FloatTensor]:
"""
Args:
@ -328,15 +328,18 @@ class MLCDEncoderLayer(GradientCheckpointingLayer):
Represents absolute positional embeddings for the query and key in the attention mechanism.
attention_mask (`torch.FloatTensor`):
Attention mask of shape `(batch, 1, q_len, k_v_seq_len)` where padding elements are indicated by very large negative values.
output_attentions (`bool`, *optional*, defaults to `False`):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail.
"""
residual = hidden_states
hidden_states = self.layer_norm1(hidden_states)
hidden_states, _ = self.self_attn(
hidden_states, attn_weights = self.self_attn(
hidden_states=hidden_states,
position_embeddings=position_embeddings,
attention_mask=attention_mask,
**kwargs,
output_attentions=output_attentions,
)
hidden_states = residual + hidden_states
@ -345,7 +348,12 @@ class MLCDEncoderLayer(GradientCheckpointingLayer):
hidden_states = self.mlp(hidden_states)
hidden_states = residual + hidden_states
return hidden_states
outputs = (hidden_states,)
if output_attentions:
outputs += (attn_weights,)
return outputs
class MLCDEncoder(nn.Module):
@ -369,7 +377,9 @@ class MLCDEncoder(nn.Module):
inputs_embeds: torch.FloatTensor,
position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor] = None,
**kwargs: Unpack[TransformersKwargs],
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[tuple, BaseModelOutput]:
r"""
Args:
@ -385,18 +395,114 @@ class MLCDEncoder(nn.Module):
- 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**.
[What are attention masks?](../glossary#attention-mask)
output_attentions (`bool`, *optional*):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail.
output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
for more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
encoder_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
hidden_states = inputs_embeds
for encoder_layer in self.layers:
hidden_states = encoder_layer(
hidden_states,
position_embeddings,
attention_mask,
**kwargs,
for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
layer_outputs = encoder_layer(
hidden_states=hidden_states,
position_embeddings=position_embeddings,
attention_mask=attention_mask,
output_attentions=output_attentions,
)
hidden_states = layer_outputs[0]
if output_attentions:
all_attentions = all_attentions + (layer_outputs[1],)
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
if not return_dict:
return tuple(v for v in [hidden_states, encoder_states, all_attentions] if v is not None)
return BaseModelOutput(
last_hidden_state=hidden_states,
hidden_states=encoder_states,
attentions=all_attentions,
)
class MLCDVisionTransformer(nn.Module):
def __init__(self, config: MLCDVisionConfig):
super().__init__()
self.config = config
embed_dim = config.hidden_size
self.embeddings = MLCDVisionEmbeddings(config)
self.pre_layrnorm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps)
self.encoder = MLCDEncoder(config)
self.post_layernorm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps)
self.vision_rotary_embedding = MLCDRotaryEmbedding(config.hidden_size // config.num_attention_heads // 2)
self.class_pos_emb = nn.Parameter(torch.randn(1, config.hidden_size // config.num_attention_heads // 2))
@auto_docstring
def forward(
self,
pixel_values: Optional[torch.FloatTensor] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[tuple, BaseModelOutputWithPooling]:
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
if pixel_values is None:
raise ValueError("You have to specify pixel_values")
num_patches_height = pixel_values.shape[-2] // self.config.patch_size
num_patches_width = pixel_values.shape[-1] // self.config.patch_size
rotary_pos_emb = self.vision_rotary_embedding(num_patches_height, num_patches_width)
rotary_pos_emb = rotary_pos_emb.to(self.class_pos_emb.device)
rotary_pos_emb = torch.cat([self.class_pos_emb, rotary_pos_emb], dim=0)
emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
position_embeddings = (emb.cos(), emb.sin())
hidden_states = self.embeddings(pixel_values)
hidden_states = self.pre_layrnorm(hidden_states)
encoder_outputs = self.encoder(
inputs_embeds=hidden_states,
position_embeddings=position_embeddings,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
last_hidden_state = encoder_outputs[0]
pooled_output = last_hidden_state[:, 0, :]
pooled_output = self.post_layernorm(pooled_output)
if not return_dict:
return (last_hidden_state, pooled_output) + encoder_outputs[1:]
return BaseModelOutputWithPooling(
last_hidden_state=last_hidden_state,
pooler_output=pooled_output,
hidden_states=encoder_outputs.hidden_states,
attentions=encoder_outputs.attentions,
)
@ -405,15 +511,8 @@ class MLCDPreTrainedModel(PreTrainedModel):
config: MLCDVisionConfig
base_model_prefix = "mlcd"
supports_gradient_checkpointing = True
accepts_loss_kwargs = False
_supports_flash_attn = True
_supports_sdpa = True
_supports_flex_attn = True
_supports_attention_backend = True
_can_record_outputs = {
"hidden_states": MLCDEncoderLayer,
"attentions": MLCDAttention,
}
def _init_weights(self, module):
"""Initialize the weights"""
@ -447,55 +546,6 @@ class MLCDPreTrainedModel(PreTrainedModel):
module.bias.data.zero_()
class MLCDVisionTransformer(nn.Module):
def __init__(self, config: MLCDVisionConfig):
super().__init__()
self.config = config
embed_dim = config.hidden_size
self.embeddings = MLCDVisionEmbeddings(config)
self.pre_layrnorm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps)
self.encoder = MLCDEncoder(config)
self.post_layernorm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps)
self.vision_rotary_embedding = MLCDRotaryEmbedding(config.hidden_size // config.num_attention_heads // 2)
self.class_pos_emb = nn.Parameter(torch.randn(1, config.hidden_size // config.num_attention_heads // 2))
@auto_docstring
def forward(
self,
pixel_values: Optional[torch.FloatTensor] = None,
**kwargs: Unpack[TransformersKwargs],
) -> Union[tuple, BaseModelOutputWithPooling]:
if pixel_values is None:
raise ValueError("You have to specify pixel_values")
num_patches_height = pixel_values.shape[-2] // self.config.patch_size
num_patches_width = pixel_values.shape[-1] // self.config.patch_size
rotary_pos_emb = self.vision_rotary_embedding(num_patches_height, num_patches_width)
rotary_pos_emb = rotary_pos_emb.to(self.class_pos_emb.device)
rotary_pos_emb = torch.cat([self.class_pos_emb, rotary_pos_emb], dim=0)
emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
position_embeddings = (emb.cos(), emb.sin())
hidden_states = self.embeddings(pixel_values)
hidden_states = self.pre_layrnorm(hidden_states)
encoder_outputs = self.encoder(
inputs_embeds=hidden_states,
position_embeddings=position_embeddings,
**kwargs,
)
last_hidden_state = encoder_outputs[0]
pooled_output = last_hidden_state[:, 0, :]
pooled_output = self.post_layernorm(pooled_output)
return BaseModelOutputWithPooling(
last_hidden_state=last_hidden_state,
pooler_output=pooled_output,
)
@auto_docstring(
custom_intro="""
The vision model from M_L_C_D without any head or projection on top.
@ -516,12 +566,13 @@ class MLCDVisionModel(MLCDPreTrainedModel):
def get_input_embeddings(self) -> nn.Module:
return self.vision_model.embeddings.patch_embedding
@check_model_inputs(tie_last_hidden_states=False)
@auto_docstring
def forward(
self,
pixel_values: Optional[torch.FloatTensor] = None,
**kwargs: Unpack[TransformersKwargs],
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[tuple, BaseModelOutputWithPooling]:
r"""
Example:
@ -545,9 +596,17 @@ class MLCDVisionModel(MLCDPreTrainedModel):
>>> print(f"Number of attention layers: {len(outputs.attentions)}")
>>> print(f"Attention shape: {outputs.attentions[0].shape}")
```"""
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
return self.vision_model(
pixel_values=pixel_values,
**kwargs,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)

View File

@ -19,11 +19,11 @@ import torch
import torch.nn as nn
from ...configuration_utils import PreTrainedConfig
from ...modeling_flash_attention_utils import FlashAttentionKwargs
from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
from ...processing_utils import Unpack
from ...utils import TransformersKwargs, auto_docstring, logging
from ...utils.generic import check_model_inputs
from ...utils import auto_docstring, logging
from ..clip.modeling_clip import (
CLIPMLP,
CLIPAttention,
@ -206,7 +206,7 @@ class MLCDAttention(CLIPAttention):
hidden_states: torch.Tensor,
position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor] = None,
**kwargs: Unpack[TransformersKwargs],
**kwargs: Unpack[FlashAttentionKwargs],
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
batch_size, seq_length = hidden_states.shape[:-1]
@ -258,7 +258,7 @@ class MLCDEncoderLayer(CLIPEncoderLayer):
hidden_states: torch.Tensor,
position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor] = None,
**kwargs: Unpack[TransformersKwargs],
output_attentions: Optional[bool] = False,
) -> tuple[torch.FloatTensor]:
"""
Args:
@ -270,15 +270,18 @@ class MLCDEncoderLayer(CLIPEncoderLayer):
Represents absolute positional embeddings for the query and key in the attention mechanism.
attention_mask (`torch.FloatTensor`):
Attention mask of shape `(batch, 1, q_len, k_v_seq_len)` where padding elements are indicated by very large negative values.
output_attentions (`bool`, *optional*, defaults to `False`):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail.
"""
residual = hidden_states
hidden_states = self.layer_norm1(hidden_states)
hidden_states, _ = self.self_attn(
hidden_states, attn_weights = self.self_attn(
hidden_states=hidden_states,
position_embeddings=position_embeddings,
attention_mask=attention_mask,
**kwargs,
output_attentions=output_attentions,
)
hidden_states = residual + hidden_states
@ -287,7 +290,12 @@ class MLCDEncoderLayer(CLIPEncoderLayer):
hidden_states = self.mlp(hidden_states)
hidden_states = residual + hidden_states
return hidden_states
outputs = (hidden_states,)
if output_attentions:
outputs += (attn_weights,)
return outputs
class MLCDEncoder(CLIPEncoder):
@ -308,7 +316,9 @@ class MLCDEncoder(CLIPEncoder):
inputs_embeds: torch.FloatTensor,
position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor] = None,
**kwargs: Unpack[TransformersKwargs],
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[tuple, BaseModelOutput]:
r"""
Args:
@ -324,18 +334,107 @@ class MLCDEncoder(CLIPEncoder):
- 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**.
[What are attention masks?](../glossary#attention-mask)
output_attentions (`bool`, *optional*):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail.
output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
for more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
encoder_states = () if output_hidden_states else None
all_attentions = () if output_attentions else None
hidden_states = inputs_embeds
for encoder_layer in self.layers:
hidden_states = encoder_layer(
hidden_states,
position_embeddings,
attention_mask,
**kwargs,
for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
layer_outputs = encoder_layer(
hidden_states=hidden_states,
position_embeddings=position_embeddings,
attention_mask=attention_mask,
output_attentions=output_attentions,
)
hidden_states = layer_outputs[0]
if output_attentions:
all_attentions = all_attentions + (layer_outputs[1],)
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
if not return_dict:
return tuple(v for v in [hidden_states, encoder_states, all_attentions] if v is not None)
return BaseModelOutput(
last_hidden_state=hidden_states,
hidden_states=encoder_states,
attentions=all_attentions,
)
class MLCDVisionTransformer(CLIPVisionTransformer):
def __init__(self, config: MLCDVisionConfig):
super().__init__(config)
self.vision_rotary_embedding = MLCDRotaryEmbedding(config.hidden_size // config.num_attention_heads // 2)
self.class_pos_emb = nn.Parameter(torch.randn(1, config.hidden_size // config.num_attention_heads // 2))
@auto_docstring
def forward(
self,
pixel_values: Optional[torch.FloatTensor] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[tuple, BaseModelOutputWithPooling]:
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
if pixel_values is None:
raise ValueError("You have to specify pixel_values")
num_patches_height = pixel_values.shape[-2] // self.config.patch_size
num_patches_width = pixel_values.shape[-1] // self.config.patch_size
rotary_pos_emb = self.vision_rotary_embedding(num_patches_height, num_patches_width)
rotary_pos_emb = rotary_pos_emb.to(self.class_pos_emb.device)
rotary_pos_emb = torch.cat([self.class_pos_emb, rotary_pos_emb], dim=0)
emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
position_embeddings = (emb.cos(), emb.sin())
hidden_states = self.embeddings(pixel_values)
hidden_states = self.pre_layrnorm(hidden_states)
encoder_outputs = self.encoder(
inputs_embeds=hidden_states,
position_embeddings=position_embeddings,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
last_hidden_state = encoder_outputs[0]
pooled_output = last_hidden_state[:, 0, :]
pooled_output = self.post_layernorm(pooled_output)
if not return_dict:
return (last_hidden_state, pooled_output) + encoder_outputs[1:]
return BaseModelOutputWithPooling(
last_hidden_state=last_hidden_state,
pooler_output=pooled_output,
hidden_states=encoder_outputs.hidden_states,
attentions=encoder_outputs.attentions,
)
@ -344,15 +443,8 @@ class MLCDPreTrainedModel(PreTrainedModel):
config: MLCDVisionConfig
base_model_prefix = "mlcd"
supports_gradient_checkpointing = True
accepts_loss_kwargs = False
_supports_flash_attn = True
_supports_sdpa = True
_supports_flex_attn = True
_supports_attention_backend = True
_can_record_outputs = {
"hidden_states": MLCDEncoderLayer,
"attentions": MLCDAttention,
}
def _init_weights(self, module):
"""Initialize the weights"""
@ -386,55 +478,14 @@ class MLCDPreTrainedModel(PreTrainedModel):
module.bias.data.zero_()
class MLCDVisionTransformer(CLIPVisionTransformer):
def __init__(self, config: MLCDVisionConfig):
super().__init__(config)
self.vision_rotary_embedding = MLCDRotaryEmbedding(config.hidden_size // config.num_attention_heads // 2)
self.class_pos_emb = nn.Parameter(torch.randn(1, config.hidden_size // config.num_attention_heads // 2))
@auto_docstring
def forward(
self,
pixel_values: Optional[torch.FloatTensor] = None,
**kwargs: Unpack[TransformersKwargs],
) -> Union[tuple, BaseModelOutputWithPooling]:
if pixel_values is None:
raise ValueError("You have to specify pixel_values")
num_patches_height = pixel_values.shape[-2] // self.config.patch_size
num_patches_width = pixel_values.shape[-1] // self.config.patch_size
rotary_pos_emb = self.vision_rotary_embedding(num_patches_height, num_patches_width)
rotary_pos_emb = rotary_pos_emb.to(self.class_pos_emb.device)
rotary_pos_emb = torch.cat([self.class_pos_emb, rotary_pos_emb], dim=0)
emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
position_embeddings = (emb.cos(), emb.sin())
hidden_states = self.embeddings(pixel_values)
hidden_states = self.pre_layrnorm(hidden_states)
encoder_outputs = self.encoder(
inputs_embeds=hidden_states,
position_embeddings=position_embeddings,
**kwargs,
)
last_hidden_state = encoder_outputs[0]
pooled_output = last_hidden_state[:, 0, :]
pooled_output = self.post_layernorm(pooled_output)
return BaseModelOutputWithPooling(
last_hidden_state=last_hidden_state,
pooler_output=pooled_output,
)
class MLCDVisionModel(CLIPVisionModel):
@check_model_inputs(tie_last_hidden_states=False)
@auto_docstring
def forward(
self,
pixel_values: Optional[torch.FloatTensor] = None,
**kwargs: Unpack[TransformersKwargs],
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[tuple, BaseModelOutputWithPooling]:
r"""
Example:
@ -458,9 +509,17 @@ class MLCDVisionModel(CLIPVisionModel):
>>> print(f"Number of attention layers: {len(outputs.attentions)}")
>>> print(f"Attention shape: {outputs.attentions[0].shape}")
```"""
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
return self.vision_model(
pixel_values=pixel_values,
**kwargs,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)

View File

@ -343,12 +343,12 @@ class PLBartEncoder(PLBartPreTrainedModel):
self.max_source_positions = config.max_position_embeddings
embed_scale = math.sqrt(embed_dim) if config.scale_embedding else 1.0
self.embed_tokens = PLBartScaledWordEmbedding(
config.vocab_size, embed_dim, self.padding_idx, embed_scale=embed_scale
)
if embed_tokens is not None:
self.embed_tokens = embed_tokens
else:
self.embed_tokens = PLBartScaledWordEmbedding(
config.vocab_size, embed_dim, self.padding_idx, embed_scale=embed_scale
)
self.embed_tokens.weight = embed_tokens.weight
self.embed_positions = PLBartLearnedPositionalEmbedding(
config.max_position_embeddings,
@ -595,12 +595,12 @@ class PLBartDecoder(PLBartPreTrainedModel):
self.max_target_positions = config.max_position_embeddings
embed_scale = math.sqrt(config.d_model) if config.scale_embedding else 1.0
self.embed_tokens = PLBartScaledWordEmbedding(
config.vocab_size, config.d_model, self.padding_idx, embed_scale=embed_scale
)
if embed_tokens is not None:
self.embed_tokens = embed_tokens
else:
self.embed_tokens = PLBartScaledWordEmbedding(
config.vocab_size, config.d_model, self.padding_idx, embed_scale=embed_scale
)
self.embed_tokens.weight = embed_tokens.weight
self.embed_positions = PLBartLearnedPositionalEmbedding(
config.max_position_embeddings,

View File

@ -1453,6 +1453,8 @@ class Qwen2_5_VLForConditionalGeneration(Qwen2_5_VLPreTrainedModel, GenerationMi
Example:
```python
>>> from PIL import Image
>>> import requests
>>> from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
>>> model = Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
@ -1462,30 +1464,22 @@ class Qwen2_5_VLForConditionalGeneration(Qwen2_5_VLPreTrainedModel, GenerationMi
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg",
},
{"type": "text", "text": "Describe the image."},
{"type": "image"},
{"type": "text", "text": "What is shown in this image?"},
],
}
},
]
>>> url = "https://www.ilankelman.org/stopsigns/australia.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = processor.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_dict=True,
return_tensors="pt"
)
>>> text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
>>> inputs = processor(text=[text], images=[image], vision_infos=[vision_infos])
>>> # Generate
>>> generated_ids = model.generate(**inputs, max_new_tokens=1024)
>>> generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
>>> output_text = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
>>> print(output_text)
```
"""
>>> generate_ids = model.generate(inputs.input_ids, max_length=30)
>>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
"The image shows a street scene with a red stop sign in the foreground. In the background, there is a large red gate with Chinese characters ..."
```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (

View File

@ -684,6 +684,8 @@ class Qwen2_5_VLForConditionalGeneration(Qwen2VLForConditionalGeneration):
Example:
```python
>>> from PIL import Image
>>> import requests
>>> from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
>>> model = Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
@ -693,30 +695,22 @@ class Qwen2_5_VLForConditionalGeneration(Qwen2VLForConditionalGeneration):
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg",
},
{"type": "text", "text": "Describe the image."},
{"type": "image"},
{"type": "text", "text": "What is shown in this image?"},
],
}
},
]
>>> url = "https://www.ilankelman.org/stopsigns/australia.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = processor.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_dict=True,
return_tensors="pt"
)
>>> text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
>>> inputs = processor(text=[text], images=[image], vision_infos=[vision_infos])
>>> # Generate
>>> generated_ids = model.generate(**inputs, max_new_tokens=1024)
>>> generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
>>> output_text = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
>>> print(output_text)
```
"""
>>> generate_ids = model.generate(inputs.input_ids, max_length=30)
>>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
"The image shows a street scene with a red stop sign in the foreground. In the background, there is a large red gate with Chinese characters ..."
```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (

View File

@ -25,7 +25,6 @@ from torch import nn
from ...activations import ACT2FN
from ...cache_utils import Cache
from ...generation import GenerationMixin
from ...masking_utils import create_bidirectional_mask
from ...modeling_layers import GradientCheckpointingLayer
from ...modeling_outputs import BaseModelOutput, ModelOutput
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
@ -775,19 +774,14 @@ class Qwen2AudioForConditionalGeneration(Qwen2AudioPreTrainedModel, GenerationMi
lengths_expand = audio_feat_lengths.unsqueeze(1).expand(batch_size, max_seq_len)
# Create mask
padding_mask = seq_range >= lengths_expand
audio_attention_mask_2d = (~padding_mask).to(dtype=torch.long, device=audio_feat_lengths.device)
dummy_embeds = torch.zeros(
(batch_size, max_seq_len, 1),
dtype=inputs_embeds.dtype,
device=inputs_embeds.device,
audio_attention_mask_ = padding_mask.view(batch_size, 1, 1, max_seq_len).expand(
batch_size, 1, max_seq_len, max_seq_len
)
audio_attention_mask = create_bidirectional_mask(
config=self.audio_tower.config,
input_embeds=dummy_embeds,
attention_mask=audio_attention_mask_2d,
audio_attention_mask = audio_attention_mask_.to(
dtype=self.audio_tower.conv1.weight.dtype, device=self.audio_tower.conv1.weight.device
)
audio_attention_mask[audio_attention_mask_] = float("-inf")
audio_outputs = self.audio_tower(input_features, attention_mask=audio_attention_mask)
selected_audio_feature = audio_outputs.last_hidden_state

View File

@ -1348,6 +1348,8 @@ class Qwen2VLForConditionalGeneration(Qwen2VLPreTrainedModel, GenerationMixin):
Example:
```python
>>> from PIL import Image
>>> import requests
>>> from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
>>> model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
@ -1357,30 +1359,22 @@ class Qwen2VLForConditionalGeneration(Qwen2VLPreTrainedModel, GenerationMixin):
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg",
},
{"type": "text", "text": "Describe the image."},
{"type": "image"},
{"type": "text", "text": "What is shown in this image?"},
],
}
},
]
>>> url = "https://www.ilankelman.org/stopsigns/australia.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = processor.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_dict=True,
return_tensors="pt"
)
>>> text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
>>> inputs = processor(text=[text], images=[image], vision_infos=[vision_infos])
>>> # Generate
>>> generated_ids = model.generate(**inputs, max_new_tokens=1024)
>>> generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
>>> output_text = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
>>> print(output_text)
```
"""
>>> generate_ids = model.generate(inputs.input_ids, max_length=30)
>>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
"The image shows a street scene with a red stop sign in the foreground. In the background, there is a large red gate with Chinese characters ..."
```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (

View File

@ -1369,42 +1369,8 @@ class Qwen3VLForConditionalGeneration(Qwen3VLPreTrainedModel, GenerationMixin):
The temporal, height and width of feature shape of each video in LLM.
Example:
```python
>>> from transformers import AutoProcessor, Qwen3VLForConditionalGeneration
>>> model = Qwen3VLForConditionalGeneration.from_pretrained("Qwen/Qwen3-VL-8B-Instruct")
>>> processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-8B-Instruct")
>>> messages = [
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg",
},
{"type": "text", "text": "Describe the image."},
],
}
]
>>> inputs = processor.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_dict=True,
return_tensors="pt"
)
>>> # Generate
>>> generated_ids = model.generate(**inputs, max_new_tokens=1024)
>>> generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
>>> output_text = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
>>> print(output_text)
```
TODO: Add example
"""
outputs = self.model(
input_ids=input_ids,
pixel_values=pixel_values,

View File

@ -1134,42 +1134,8 @@ class Qwen3VLForConditionalGeneration(Qwen2_5_VLForConditionalGeneration):
The temporal, height and width of feature shape of each video in LLM.
Example:
```python
>>> from transformers import AutoProcessor, Qwen3VLForConditionalGeneration
>>> model = Qwen3VLForConditionalGeneration.from_pretrained("Qwen/Qwen3-VL-8B-Instruct")
>>> processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-8B-Instruct")
>>> messages = [
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg",
},
{"type": "text", "text": "Describe the image."},
],
}
]
>>> inputs = processor.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_dict=True,
return_tensors="pt"
)
>>> # Generate
>>> generated_ids = model.generate(**inputs, max_new_tokens=1024)
>>> generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
>>> output_text = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
>>> print(output_text)
```
TODO: Add example
"""
outputs = self.model(
input_ids=input_ids,
pixel_values=pixel_values,
@ -1350,6 +1316,7 @@ class Qwen3VLProcessor(Qwen2VLProcessor):
video_metadata = videos_inputs.pop("video_metadata")
else:
video_metadata = videos_inputs["video_metadata"]
video_grid_thw = videos_inputs["video_grid_thw"]
else:
videos_inputs = {}
video_grid_thw = None

View File

@ -157,6 +157,7 @@ class Qwen3VLProcessor(ProcessorMixin):
video_metadata = videos_inputs.pop("video_metadata")
else:
video_metadata = videos_inputs["video_metadata"]
video_grid_thw = videos_inputs["video_grid_thw"]
else:
videos_inputs = {}
video_grid_thw = None

View File

@ -206,7 +206,7 @@ class VoxtralProcessor(ProcessorMixin):
tokenizer_kwargs = {**processed_kwargs["template_kwargs"], **text_kwargs}
tokenizer_kwargs["return_tensors"] = None # let's not return tensors here
tokenize = tokenizer_kwargs.pop("tokenize", False)
return_dict = tokenizer_kwargs.pop("return_dict", True)
return_dict = tokenizer_kwargs.pop("return_dict", False)
encoded_instruct_inputs = self.tokenizer.apply_chat_template(
conversations,

View File

@ -1603,7 +1603,7 @@ class ProcessorMixin(PushToHubMixin):
conversations = [conversation]
tokenize = processed_kwargs["template_kwargs"].pop("tokenize", False)
return_dict = processed_kwargs["template_kwargs"].pop("return_dict", True)
return_dict = processed_kwargs["template_kwargs"].pop("return_dict", False)
mm_load_kwargs = processed_kwargs["mm_load_kwargs"]
if tokenize:

View File

@ -383,10 +383,6 @@ class Mxfp4HfQuantizer(HfQuantizer):
state_dict = model.state_dict()
# Get num_local_experts from model config
num_local_experts = getattr(model.config, "num_local_experts", 32)
hidden_size = getattr(model.config, "hidden_size", 2880)
for name, module in model.named_modules():
if (
isinstance(module, Mxfp4GptOssExperts)
@ -396,7 +392,7 @@ class Mxfp4HfQuantizer(HfQuantizer):
state_dict[f"{name}.gate_up_proj_blocks"] = (
module.gate_up_proj.storage.layout.unswizzle_data(module.gate_up_proj.storage.data)
.transpose(-1, -2)
.reshape(num_local_experts, -1, 90, 16)
.reshape(32, -1, 90, 16)
)
state_dict[f"{name}.gate_up_proj_scales"] = (
module.gate_up_proj_precision_config.weight_scale.storage.layout.unswizzle_data(
@ -406,7 +402,7 @@ class Mxfp4HfQuantizer(HfQuantizer):
state_dict[f"{name}.down_proj_blocks"] = (
module.down_proj.storage.layout.unswizzle_data(module.down_proj.storage.data)
.transpose(-1, -2)
.reshape(num_local_experts, hidden_size, 90, -1)
.reshape(32, 2880, 90, -1)
)
state_dict[f"{name}.down_proj_scales"] = (
module.down_proj_precision_config.weight_scale.storage.layout.unswizzle_data(

View File

@ -1378,7 +1378,7 @@ class MistralCommonTokenizer(PushToHubMixin):
truncation: bool = False,
max_length: Optional[int] = None,
return_tensors: Optional[Union[str, TensorType]] = None,
return_dict: bool = True,
return_dict: bool = False,
**kwargs,
) -> Union[str, list[int], list[str], list[list[int]], BatchEncoding]:
"""

View File

@ -18,8 +18,6 @@ fronting encoding methods) Special token mixing (host the special tokens logic)
of output with special method for the Fast tokenizers)
"""
from __future__ import annotations
import copy
import json
import os
@ -785,7 +783,7 @@ class BatchEncoding(UserDict):
return self
def to(self, device: Union[str, torch.device], *, non_blocking: bool = False) -> BatchEncoding:
def to(self, device: Union[str, "torch.device"], *, non_blocking: bool = False) -> "BatchEncoding":
"""
Send all values to device by calling `v.to(device, non_blocking=non_blocking)` (PyTorch only).
@ -1588,7 +1586,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
truncation: bool = False,
max_length: Optional[int] = None,
return_tensors: Optional[Union[str, TensorType]] = None,
return_dict: bool = True,
return_dict: bool = False,
return_assistant_tokens_mask: bool = False,
tokenizer_kwargs: Optional[dict[str, Any]] = None,
**kwargs,
@ -1661,11 +1659,14 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
set, will return a dict of tokenizer outputs instead.
"""
if not tokenize:
return_dict = False # dicts are only returned by the tokenizer anyway
if return_dict and not tokenize:
raise ValueError(
"`return_dict=True` is incompatible with `tokenize=False`, because there is no dict "
"of tokenizer outputs to return."
)
if return_assistant_tokens_mask and not (return_dict and tokenize):
raise ValueError("`return_assistant_tokens_mask=True` requires `return_dict=True` and `tokenize=True`")
if return_assistant_tokens_mask and not return_dict:
raise ValueError("`return_assistant_tokens_mask=True` is incompatible with `return_dict=False`")
if tokenizer_kwargs is None:
tokenizer_kwargs = {}
@ -1780,17 +1781,13 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
)
if conversation_history is None or len(conversation_history) == 0:
return self.apply_chat_template(
[message], add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
)
return self.apply_chat_template([message], add_generation_prompt=False, tokenize=True, **kwargs)
conversation = conversation_history + [message]
tokens = self.apply_chat_template(
conversation, add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
)
tokens = self.apply_chat_template(conversation, add_generation_prompt=False, tokenize=True, **kwargs)
prefix_tokens = self.apply_chat_template(
conversation_history, add_generation_prompt=False, tokenize=True, return_dict=False, **kwargs
conversation_history, add_generation_prompt=False, tokenize=True, **kwargs
)
# It's possible that the prefix tokens are not a prefix of the full list of tokens.
# For example, if the prefix is `<s>User: Hi` and the full conversation is `<s>User: Hi</s><s>Assistant: Hello`.
@ -1861,11 +1858,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
return chat_template
def parse_response(
self,
response: str | list[str | int | list[int]] | np.ndarray | torch.Tensor,
schema: list | dict | None = None,
):
def parse_response(self, response: str, schema: Optional[Union[list, dict]] = None):
"""
Converts an output string created by generating text from a model into a parsed message dictionary.
This method is intended for use with chat models, and will read the tokenizer's `response_schema` attribute to
@ -1876,29 +1869,16 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
Args:
response (`str`):
The output string generated by the model. This can be either a decoded string or list of strings,
or token IDs as a list/array.
The output string generated by the model. This should be the decoded string, not raw tokens.
schema (`Union[list, dict]`, *optional*):
A response schema that indicates the expected output format and how parsing should be performed.
If not provided, the tokenizer's `response_schema` attribute will be used.
"""
batched = (
(isinstance(response, list) and not isinstance(response[0], int))
or getattr(response, "ndim", 0) > 1 # For torch/numpy tensors
)
if schema is None:
if getattr(self, "response_schema", None) is None:
raise AttributeError("This tokenizer does not have a `response_schema` for parsing chat responses!")
schema = self.response_schema
if batched:
if not (isinstance(response, list) and isinstance(response[0], str)):
response = self.batch_decode(response)
return [recursive_parse(single_response, schema) for single_response in response]
else:
if not isinstance(response, str):
response = self.decode(response)
return recursive_parse(response, schema)
return recursive_parse(response, schema)
@classmethod
def from_pretrained(
@ -3883,7 +3863,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
def batch_decode(
self,
sequences: Union[list[int], list[list[int]], np.ndarray, torch.Tensor],
sequences: Union[list[int], list[list[int]], np.ndarray, "torch.Tensor"],
skip_special_tokens: bool = False,
clean_up_tokenization_spaces: Optional[bool] = None,
**kwargs,
@ -3917,7 +3897,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
def decode(
self,
token_ids: Union[int, list[int], np.ndarray, torch.Tensor],
token_ids: Union[int, list[int], np.ndarray, "torch.Tensor"],
skip_special_tokens: bool = False,
clean_up_tokenization_spaces: Optional[bool] = None,
**kwargs,

View File

@ -173,12 +173,12 @@ def recursive_parse(
return parsed_schema
elif isinstance(node_content, dict):
for key, child_node in node_schema.get("properties", {}).items():
if "const" in child_node:
parsed_schema[key] = child_node["const"]
elif key in node_content:
if key in node_content:
parsed_schema[key] = recursive_parse(node_content[key], child_node)
elif "default" in child_node:
parsed_schema[key] = child_node["default"]
else:
pass
if "additionalProperties" in node_schema:
for key, value in node_content.items():
if key not in node_schema.get("properties", {}):

View File

@ -47,7 +47,8 @@ PACKAGE_DISTRIBUTION_MAPPING = importlib.metadata.packages_distributions()
def _is_package_available(pkg_name: str, return_version: bool = False) -> tuple[bool, str] | bool:
"""Check if `pkg_name` exist, and optionally try to get its version"""
spec = importlib.util.find_spec(pkg_name)
package_exists = spec is not None
# the spec might be not None but not importable
package_exists = spec is not None and spec.loader is not None
package_version = "N/A"
if package_exists and return_version:
try:

View File

@ -520,6 +520,7 @@ class AriaForConditionalGenerationIntegrationTest(unittest.TestCase):
quantization_config=BitsAndBytesConfig(load_in_4bit=True, llm_int8_skip_modules=["multihead_attn"]),
)
processor = AutoProcessor.from_pretrained(model_id)
assert model.device.type == "cuda", "This test is only supported on CUDA" # TODO: remove this
# Prepare inputs with no images
inputs = processor(text="Hello, I am", return_tensors="pt").to(torch_device)

View File

@ -267,7 +267,7 @@ class AyaVisionIntegrationTest(unittest.TestCase):
EXPECTED_LOGITS = Expectations(
{
("xpu", 3): [1.6699, 0.6260, 3.2266, 8.5547, 2.209],
("xpu", 3): [0.4109, 0.1532, 0.8018, 2.1328, 0.5483],
# 4-bit
("cuda", 7): [0.1097, 0.3481, 3.8340, 9.7969, 2.0488],
("cuda", 8): [1.6396, 0.6094, 3.1992, 8.5234, 2.1875],
@ -308,7 +308,7 @@ class AyaVisionIntegrationTest(unittest.TestCase):
expected_outputs = Expectations(
{
("xpu", 3): "Whispers on the breeze,\nLeaves dance under moonlit sky,\nNature's quiet song.",
("xpu", 3): "Whispers on the breeze,\nLeaves dance under moonlit skies,\nNature's quiet song.",
# 4-bit
("cuda", 7): "Sure, here's a haiku for you:\n\nMorning dew sparkles,\nPetals unfold in sunlight,\n",
("cuda", 8): "Whispers on the breeze,\nLeaves dance under moonlit skies,\nNature's quiet song.",
@ -474,7 +474,7 @@ class AyaVisionIntegrationTest(unittest.TestCase):
# Batching seems to alter the output slightly, but it is also the case in the original implementation. This seems to be expected: https://github.com/huggingface/transformers/issues/23017#issuecomment-1649630232
expected_outputs = Expectations(
{
("xpu", 3): "Wooden path to water,\nMountains echo in stillness,\nPeaceful forest scene.",
("xpu", 3): "Wooden path to water,\nMountains echo in stillness,\nPeaceful forest lake.",
("cuda", 7): 'Wooden bridge stretches\nMirrored lake below, mountains rise\nPeaceful, serene',
("cuda", 8): 'Wooden path to water,\nMountains echo in stillness,\nPeaceful forest scene.',
}

View File

@ -16,7 +16,6 @@
import copy
import tempfile
import unittest
import unittest.mock
from functools import cached_property
import timeout_decorator # noqa
@ -478,23 +477,6 @@ class BartModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
with torch.no_grad():
model(**inputs)[0]
def test_input_embeddings_support_forward_hook(self):
# Make sure that registering hooks on the input embeddings are indeed called
# in forward. This is necessary for gradient checkpointing in PEFT, see also #41821.
config, inputs_dict = self.model_tester.prepare_config_and_inputs()
for model_class in self.all_model_classes:
model = model_class(config)
model.to(torch_device)
model.eval()
hook = unittest.mock.MagicMock(return_value=None)
model.get_input_embeddings().register_forward_hook(hook)
inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
model(**inputs)
self.assertGreater(hook.call_count, 0)
@require_torch_fp16
def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs()

View File

@ -18,6 +18,7 @@ import unittest
from functools import cached_property
from transformers import BlenderbotTokenizer, BlenderbotTokenizerFast
from transformers.testing_utils import require_jinja
class Blenderbot3BTokenizerTests(unittest.TestCase):
@ -50,3 +51,24 @@ class Blenderbot3BTokenizerTests(unittest.TestCase):
def test_3B_tokenization_same_as_parlai_rust_tokenizer(self):
assert self.rust_tokenizer_3b.add_prefix_space
assert self.rust_tokenizer_3b([" Sam", "Sam"]).input_ids == [[5502, 2], [5502, 2]]
@require_jinja
def test_tokenization_for_chat(self):
tok = self.tokenizer_3b
test_chats = [
[{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
[
{"role": "system", "content": "You are a helpful chatbot."},
{"role": "user", "content": "Hello!"},
{"role": "assistant", "content": "Nice to meet you."},
],
[{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}],
]
tokenized_chats = [tok.apply_chat_template(test_chat) for test_chat in test_chats]
expected_tokens = [
[553, 366, 265, 4792, 3879, 73, 311, 21, 228, 228, 6950, 8, 2],
[553, 366, 265, 4792, 3879, 73, 311, 21, 228, 228, 6950, 8, 228, 3490, 287, 2273, 304, 21, 2],
[3490, 287, 2273, 304, 21, 228, 228, 6950, 8, 2],
]
for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
self.assertListEqual(tokenized_chat, expected_tokens)

View File

@ -18,7 +18,7 @@ import unittest
from datasets import load_dataset
from transformers import BloomTokenizerFast
from transformers.testing_utils import require_tokenizers
from transformers.testing_utils import require_jinja, require_tokenizers
from ...test_tokenization_common import TokenizerTesterMixin
@ -137,6 +137,28 @@ class BloomTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
predicted_text = [tokenizer.decode(x, clean_up_tokenization_spaces=False) for x in output_tokens]
self.assertListEqual(predicted_text, input_text)
@require_jinja
def test_tokenization_for_chat(self):
tokenizer = self.get_rust_tokenizer()
tokenizer.chat_template = "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}"
test_chats = [
[{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
[
{"role": "system", "content": "You are a helpful chatbot."},
{"role": "user", "content": "Hello!"},
{"role": "assistant", "content": "Nice to meet you."},
],
[{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}],
]
tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
expected_tokens = [
[5448, 1306, 267, 66799, 44799, 37143, 17, 2, 59414, 4, 2],
[5448, 1306, 267, 66799, 44799, 37143, 17, 2, 59414, 4, 2, 229126, 427, 11890, 1152, 17, 2],
[229126, 427, 11890, 1152, 17, 2, 59414, 4, 2],
]
for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
self.assertListEqual(tokenized_chat, expected_tokens)
def test_add_prefix_space_fast(self):
tokenizer_w_prefix = self.get_rust_tokenizer(add_prefix_space=True)
tokenizer_wo_prefix = self.get_rust_tokenizer(add_prefix_space=False)

View File

@ -146,6 +146,32 @@ class CohereTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertGreaterEqual(len(self.tokenizer_class.pretrained_vocab_files_map), 1)
self.assertGreaterEqual(len(list(self.tokenizer_class.pretrained_vocab_files_map.values())[0]), 1)
@require_jinja
def test_tokenization_for_chat(self):
tokenizer = self.get_rust_tokenizer()
test_chats = [
[{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
[
{"role": "system", "content": "You are a helpful chatbot."},
{"role": "user", "content": "Hello!"},
{"role": "assistant", "content": "Nice to meet you."},
],
]
tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
# fmt: off
expected_tokens = [
[5, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 59, 65, 59, 60, 45, 53, 71, 60, 55, 51, 45, 54, 99, 38, 65, 243, 394, 204, 336, 84, 88, 887, 374, 216, 74, 286, 22, 8, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 61, 59, 45, 58, 71, 60, 55, 51, 45, 54, 99, 38, 48, 420, 87, 9, 8],
[5, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 59, 65,
59, 60, 45, 53, 71, 60, 55, 51, 45, 54, 99, 38, 65, 243, 394, 204, 336, 84, 88, 887, 374, 216, 74, 286, 22, 8,
36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61, 58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 61, 59,
45, 58, 71, 60, 55, 51, 45, 54, 99, 38, 48, 420, 87, 9, 8, 36, 99, 59, 60, 41, 58, 60, 71, 55, 46, 71, 60, 61,
58, 54, 71, 60, 55, 51, 45, 54, 99, 38, 36, 99, 43, 48, 41, 60, 42, 55, 60, 71, 60, 55, 51, 45, 54, 99, 38,
54, 567, 235, 693, 276, 411, 243, 22, 8]
]
# fmt: on
for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
self.assertListEqual(tokenized_chat, expected_tokens)
@require_jinja
def test_tokenization_for_tool_use(self):
tokenizer = self.get_rust_tokenizer()

View File

@ -27,6 +27,7 @@ from transformers.convert_slow_tokenizer import convert_slow_tokenizer
from transformers.testing_utils import (
get_tests_dir,
nested_simplify,
require_jinja,
require_read_token,
require_sentencepiece,
require_tokenizers,
@ -427,6 +428,25 @@ class GemmaIntegrationTest(unittest.TestCase):
# a dummy prefix space is not added by the sp_model as it was de-activated
self.assertEqual(tokens, tokenizer.sp_model.encode("▁▁", out_type=str))
@require_jinja
def test_tokenization_for_chat(self):
tokenizer = GemmaTokenizer.from_pretrained("hf-internal-testing/dummy-gemma")
test_chats = [
[{"role": "user", "content": "Hello!"}],
[
{"role": "user", "content": "Hello!"},
{"role": "assistant", "content": "Nice to meet you."},
],
[{"role": "user", "content": "Hello!"}],
]
# Matt: The third test case tests the default system message, but if this is ever changed in the
# class/repo code then that test will fail, and the case will need to be updated.
tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
expected_tokens = [[235322, 235371, 571, 235298, 2997, 73786, 1645, 108, 4521, 149907, 235371, 571, 235298, 615, 73786, 108], [235322, 235371, 571, 235298, 2997, 73786, 1645, 108, 4521, 149907, 235371, 571, 235298, 615, 73786, 108, 235322, 235371, 571, 235298, 2997, 73786, 105776, 108, 7731, 577, 4664, 692, 35606, 235371, 571, 235298, 615, 73786, 108], [235322, 235371, 571, 235298, 2997, 73786, 1645, 108, 4521, 149907, 235371, 571, 235298, 615, 73786, 108]] # fmt: skip
for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
self.assertListEqual(tokenized_chat, expected_tokens)
def test_save_fast_load_slow(self):
# Ensure that we can save a fast tokenizer and load it as a slow tokenizer
slow_tokenizer = self.tokenizer

View File

@ -499,7 +499,7 @@ class Gemma3IntegrationTest(unittest.TestCase):
EXPECTED_TEXTS = Expectations(
{
("xpu", 3): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with turquoise water and a blue sky in the background. It looks like a'],
("xpu", 3): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown and white cow standing on a sandy beach with turquoise water in the background. It looks like a lovely,'],
("cuda", (8, 0)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with clear turquoise water and a blue sky in the background. It looks like'],
("cuda", (8, 6)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with clear blue water and a blue sky in the background. It looks like'],
("rocm", (9, 4)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nCertainly! \n\nThe image shows a brown cow standing on a sandy beach with turquoise water and a blue sky in the background. It looks like a'],
@ -610,7 +610,7 @@ class Gemma3IntegrationTest(unittest.TestCase):
EXPECTED_NUM_IMAGES = 3 # one for the origin image and two crops of images
EXPECTED_TEXTS = Expectations(
{
("xpu", 3): ["user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There's a bright blue sky with some white clouds in the"],
("xpu", 3): ['user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There are clouds in the blue sky above.'],
("cuda", 7): [],
("cuda", (8, 6)): ["user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There's a clear blue sky with some white clouds above."],
("cuda", (8, 0)): ["user\nYou are a helpful assistant.\n\nHere is the original image \n\n\n\n and here are some crops to help you see better \n\n\n\n \n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown cow standing on a sandy beach next to a turquoise ocean. There's a blue sky with some white clouds in the background"],

View File

@ -24,9 +24,7 @@ from transformers import (
is_torch_available,
)
from transformers.testing_utils import (
Expectations,
cleanup,
require_deterministic_for_xpu,
require_flash_attn,
require_torch,
require_torch_gpu,
@ -415,7 +413,6 @@ class Glm4vIntegrationTest(unittest.TestCase):
)
@slow
@require_deterministic_for_xpu
def test_small_model_integration_test_expand(self):
model = Glm4vForConditionalGeneration.from_pretrained(
"THUDM/GLM-4.1V-9B-Thinking", dtype="auto", device_map="auto"
@ -429,23 +426,14 @@ class Glm4vIntegrationTest(unittest.TestCase):
output = model.generate(**inputs, max_new_tokens=30, do_sample=False, num_beams=2, num_return_sequences=2)
# fmt: off
EXPECTED_DECODED_TEXTS = Expectations(
{
(None, None): ["\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture doesn't look like a dog; it's actually a cat. Specifically",
"\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture doesn't look like a dog; it's actually a cat, specifically"
],
("xpu", None): ["\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture is not a dog; it's a cat. Specifically, it looks",
"\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture is not a dog; it's a cat, specifically a Pallas"
],
}
EXPECTED_DECODED_TEXT = [
"\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture doesn't look like a dog; it's actually a cat. Specifically",
"\nWhat kind of dog is this?\n<think>Got it, let's look at the image. The animal in the picture doesn't look like a dog; it's actually a cat, specifically"
] # fmt: skip
self.assertEqual(
self.processor.batch_decode(output, skip_special_tokens=True),
EXPECTED_DECODED_TEXT,
)
# fmt: on
EXPECTED_DECODED_TEXT = EXPECTED_DECODED_TEXTS.get_expectation()
decoded_text = self.processor.batch_decode(output, skip_special_tokens=True)
self.assertEqual(decoded_text, EXPECTED_DECODED_TEXT)
@slow
def test_small_model_integration_test_batch_wo_image(self):

View File

@ -19,7 +19,7 @@ import unittest
from transformers import AutoTokenizer, GPT2Tokenizer, GPT2TokenizerFast
from transformers.models.gpt2.tokenization_gpt2 import VOCAB_FILES_NAMES
from transformers.testing_utils import require_tiktoken, require_tokenizers
from transformers.testing_utils import require_jinja, require_tiktoken, require_tokenizers
from ...test_tokenization_common import TokenizerTesterMixin
@ -281,6 +281,28 @@ class GPT2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
filtered_sequence = [x for x in filtered_sequence if x is not None]
self.assertEqual(encoded_sequence, filtered_sequence)
@require_jinja
def test_tokenization_for_chat(self):
tokenizer = GPT2Tokenizer.from_pretrained(self.tmpdirname)
tokenizer.chat_template = "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}"
test_chats = [
[{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
[
{"role": "system", "content": "You are a helpful chatbot."},
{"role": "user", "content": "Hello!"},
{"role": "assistant", "content": "Nice to meet you."},
],
[{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}],
]
tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
# fmt: off
expected_tokens = [[20, 1, 20, 10, 20, 4, 3, 10, 20, 10, 20, 3, 0, 20, 20, 20, 0, 10, 20, 20, 20, 6, 20, 1, 6, 20, 20, 20, 3, 0, 0, 1, 20, 20],
[20, 1, 20, 10, 20, 4, 3, 10, 20, 10, 20, 3, 0, 20, 20, 20, 0, 10, 20, 20, 20, 6, 20, 1, 6, 20, 20, 20, 3, 0, 0, 1, 20, 20, 20, 7, 20, 3, 10, 6, 1, 10, 20, 3, 3, 6, 10, 20, 1, 20, 20, 20],
[20, 7, 20, 3, 10, 6, 1, 10, 20, 3, 3, 6, 10, 20, 1, 20, 20, 20, 20, 3, 0, 0, 1, 20, 20]]
# fmt: on
for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
self.assertListEqual(tokenized_chat, expected_tokens)
@require_tiktoken
def test_tokenization_tiktoken(self):
from tiktoken import encoding_name_for_model

View File

@ -15,7 +15,7 @@
import unittest
from transformers import GPTSw3Tokenizer
from transformers.testing_utils import get_tests_dir, require_sentencepiece, require_tokenizers, slow
from transformers.testing_utils import get_tests_dir, require_jinja, require_sentencepiece, require_tokenizers, slow
from ...test_tokenization_common import TokenizerTesterMixin
@ -127,3 +127,36 @@ class GPTSw3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
model_name="AI-Sweden-Models/gpt-sw3-126m",
sequences=sequences,
)
@require_jinja
def test_tokenization_for_chat(self):
tokenizer = GPTSw3Tokenizer(SAMPLE_VOCAB)
tokenizer.chat_template = (
"{{ eos_token }}{{ bos_token }}"
"{% for message in messages %}"
"{% if message['role'] == 'user' %}{{ 'User: ' + message['content']}}"
"{% else %}{{ 'Bot: ' + message['content']}}{% endif %}"
"{{ message['text'] }}{{ bos_token }}"
"{% endfor %}"
"Bot:"
)
# This is in English, but it's just here to make sure the chat control tokens are being added properly
test_chats = [
[{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
[
{"role": "system", "content": "You are a helpful chatbot."},
{"role": "user", "content": "Hello!"},
{"role": "assistant", "content": "Nice to meet you."},
],
[{"role": "assistant", "content": "Nice to meet you."}, {"role": "user", "content": "Hello!"}],
]
tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
# fmt: off
expected_tokens = [
[2000, 1, 575, 541, 419, 530, 339, 265, 878, 708, 727, 275, 347, 541, 260, 1, 968, 263, 314, 419, 366, 354, 294, 360, 1, 575, 541, 419],
[2000, 1, 575, 541, 419, 530, 339, 265, 878, 708, 727, 275, 347, 541, 260, 1, 968, 263, 314, 419, 366, 354, 294, 360, 1, 575, 541, 419, 984, 429, 281, 264, 1261, 291, 260, 1, 575, 541, 419],
[2000, 1, 575, 541, 419, 984, 429, 281, 264, 1261, 291, 260, 1, 968, 263, 314, 419, 366, 354, 294, 360, 1, 575, 541, 419]
]
# fmt: on
for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
self.assertListEqual(tokenized_chat, expected_tokens)

View File

@ -682,7 +682,7 @@ class GroundingDinoModelIntegrationTests(unittest.TestCase):
expectations = Expectations(
{
(None, None): [0.4526, 0.4082],
(None, None): [[0.4526, 0.4082]],
("cuda", 8): [0.4524, 0.4074],
}
)

View File

@ -227,7 +227,6 @@ class InternVLQwen2IntegrationTest(unittest.TestCase):
def tearDown(self):
cleanup(torch_device, gc_collect=True)
@require_deterministic_for_xpu
def test_qwen2_small_model_integration_generate(self):
processor = AutoProcessor.from_pretrained(self.small_model_checkpoint)
model = InternVLForConditionalGeneration.from_pretrained(
@ -245,16 +244,7 @@ class InternVLQwen2IntegrationTest(unittest.TestCase):
decoded_output = processor.decode(
generate_ids[0, inputs["input_ids"].shape[1] :], skip_special_tokens=True
)
# fmt: off
expected_outputs = Expectations(
{
(None, None): "The image shows two cats lying on a pink surface, which appears to be a bed or couch.",
("xpu", 3): "The image shows two cats lying on a pink blanket. The cat on the left is a tabby",
}
)
# fmt: on
expected_output = expected_outputs.get_expectation()
expected_output = "The image shows two cats lying on a pink surface, which appears to be a bed or couch."
self.assertEqual(decoded_output, expected_output)
@ -278,9 +268,9 @@ class InternVLQwen2IntegrationTest(unittest.TestCase):
actual_logits = output.logits[0, -1, :5].cpu()
expected_logits_all = Expectations(
{
("xpu", 3): torch.tensor([11.9922, 14.7188, 14.3125, 10.6719, 6.9297], dtype=torch.float16),
("cuda", 7): torch.tensor([11.9531, 14.7031, 14.2734, 10.6562, 6.9219], dtype=torch.float16),
("cuda", 8): torch.tensor([11.9609, 14.7188, 14.2734, 10.6484, 6.9141], dtype=torch.float16),
("xpu", 3): torch.tensor([11.7500, 14.7500, 14.1250, 10.5625, 6.7812], dtype=torch.float16),
("cuda", 7): torch.tensor([11.9531, 14.7031, 14.2734, 10.6562, 6.9219], dtype=torch.float16),
("cuda", 8): torch.tensor([11.9609, 14.7188, 14.2734, 10.6484, 6.9141], dtype=torch.float16),
}
) # fmt: skip
expected_logits = expected_logits_all.get_expectation()
@ -308,7 +298,7 @@ class InternVLQwen2IntegrationTest(unittest.TestCase):
expected_outputs = Expectations(
{
("xpu", 3): "Whispers of dawn,\nSilent whispers of night,\nPeace in the stillness.",
("xpu", 3): "Whispers of dawn,\nSilent whispers of the night,\nNew day's light.",
("cuda", 7): 'Whispers of dawn,\nSilent whispers of night,\nPeace in the stillness.',
("cuda", 8): 'Whispers of dawn,\nSilent whispers of night,\nPeace in the stillness.',
}
@ -580,7 +570,7 @@ class InternVLQwen2IntegrationTest(unittest.TestCase):
decoded_output = processor.decode(output[1], skip_special_tokens=True)
expected_outputs = Expectations(
{
("xpu", 3): "user\nFrame1: \nFrame2: \nFrame3: \nFrame4: \nFrame5: \nFrame6: \nFrame7: \nFrame8: \nWhat type of shot is the man performing?\nassistant\nA forehand shot",
("xpu", 3): "user\nFrame1: \nFrame2: \nFrame3: \nFrame4: \nFrame5: \nFrame6: \nFrame7: \nFrame8: \nWhat type of shot is the man performing?\nassistant\nThe man is performing a forehand shot.",
("cuda", 7): 'user\nFrame1: \nFrame2: \nFrame3: \nFrame4: \nFrame5: \nFrame6: \nFrame7: \nFrame8: \nWhat type of shot is the man performing?\nassistant\nA forehand shot',
}
) # fmt: skip

View File

@ -18,9 +18,7 @@ import unittest
from transformers import AutoTokenizer, is_torch_available, set_seed
from transformers.testing_utils import (
Expectations,
cleanup,
require_deterministic_for_xpu,
require_read_token,
require_torch,
require_torch_accelerator,
@ -172,30 +170,36 @@ class Lfm2MoeIntegrationTest(unittest.TestCase):
input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device)
with torch.no_grad():
out = model(input_ids).logits.float().cpu()
# fmt: off
# Expected mean on dim = -1
EXPECTED_MEANS = Expectations(
{
("cuda", None): torch.tensor([[-1.3855, -0.5123, -1.3143, -1.2144, -1.0791, -1.2117, -1.4704, -0.7648, -0.6175, -1.2402, -1.1459, -1.0083, -1.0247, -0.8830, -1.5643, -1.7266, -1.6254,]]),
("xpu", None): torch.tensor([[-1.3863, -0.4653, -1.3246, -1.3199, -1.0940, -1.2254, -1.4716, -0.8852, -0.5920, -1.2182, -1.1782, -1.0268, -1.0114, -0.8816, -1.5774, -1.7408, -1.6147,]]),
}
EXPECTED_MEAN = torch.tensor(
[
[
-1.3855,
-0.5123,
-1.3143,
-1.2144,
-1.0791,
-1.2117,
-1.4704,
-0.7648,
-0.6175,
-1.2402,
-1.1459,
-1.0083,
-1.0247,
-0.8830,
-1.5643,
-1.7266,
-1.6254,
]
]
)
# fmt: on
EXPECTED_MEAN = EXPECTED_MEANS.get_expectation()
out_mean = out.mean(-1)
torch.testing.assert_close(out_mean, EXPECTED_MEAN, rtol=1e-2, atol=1e-2)
# fmt: off
torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, rtol=1e-2, atol=1e-2)
# Expected portion of the logits
EXPECTED_SLICES = Expectations(
{
("cuda", None): torch.tensor([-1.2656, 2.4844, 5.5000, -1.3359, -1.3203, -1.3438, 1.9375, 5.8438, -0.6523, -1.2891]),
("xpu", None): torch.tensor([-1.2656, 2.4531, 5.4375, -1.3438, -1.3203, -1.3516, 1.9297, 5.7812, -0.6719, -1.3203]),
}
EXPECTED_SLICE = torch.tensor(
[-1.2656, 2.4844, 5.5000, -1.3359, -1.3203, -1.3438, 1.9375, 5.8438, -0.6523, -1.2891]
)
# fmt: on
EXPECTED_SLICE = EXPECTED_SLICES.get_expectation()
out_slice = out[0, 0, :10]
torch.testing.assert_close(out_slice, EXPECTED_SLICE, rtol=1e-4, atol=1e-4)
torch.testing.assert_close(out[0, 0, :10], EXPECTED_SLICE, rtol=1e-4, atol=1e-4)
@slow
def test_model_1a8b_generation(self):
@ -213,25 +217,13 @@ class Lfm2MoeIntegrationTest(unittest.TestCase):
self.assertEqual(EXPECTED_TEXT_COMPLETION, text)
@slow
@require_deterministic_for_xpu
def test_model_1a8b_batched_chat_generation(self):
prompts = ["Who are you?", "Complete the text: Lorem ipsum dolor ", "The Meji Restoration in Japan ended"]
# fmt: off
EXPECTED_TEXT_COMPLETIONS = Expectations(
{
("cuda", None): ["Who are you?, a language model designed to assist with information and tasks? \nI am",
"Complete the text: Lorem ipsum dolor ipsum dolor ipsum dolor ipsum dolor ipsum dolor",
"The Meji Restoration in Japan ended or the Meiji Restoration (18681912) marked a pivotal",
],
("xpu", None): ['Who are you? (AI) designed to assist? \nI am an AI assistant developed to',
'Complete the text: Lorem ipsum dolor ipsum dolor ipsum dolor ipsum dolor ipsum dolor',
'The Meji Restoration in Japan ended** \n* **Key Event:** The overthrow of the Tokugawa'
],
}
)
# fmt: on
EXPECTED_TEXT_COMPLETION = EXPECTED_TEXT_COMPLETIONS.get_expectation()
EXPECTED_TEXT_COMPLETIONS = [
"Who are you?, a language model designed to assist with information and tasks? \nI am",
"Complete the text: Lorem ipsum dolor ipsum dolor ipsum dolor ipsum dolor ipsum dolor",
"The Meji Restoration in Japan ended or the Meiji Restoration (18681912) marked a pivotal",
]
set_seed(1789)
tokenizer = AutoTokenizer.from_pretrained("LiquidAI/LFM2-8B-A1B", use_fast=False)
model = self.get_model()
@ -241,4 +233,4 @@ class Lfm2MoeIntegrationTest(unittest.TestCase):
with torch.no_grad():
generated_ids = model.generate(**batched_input_ids, max_new_tokens=15, do_sample=False)
text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
self.assertEqual(EXPECTED_TEXT_COMPLETION, text)
self.assertEqual(EXPECTED_TEXT_COMPLETIONS, text)

View File

@ -32,6 +32,7 @@ from transformers.convert_slow_tokenizer import convert_slow_tokenizer
from transformers.testing_utils import (
get_tests_dir,
nested_simplify,
require_jinja,
require_read_token,
require_sentencepiece,
require_tiktoken,
@ -701,6 +702,32 @@ class LlamaIntegrationTest(unittest.TestCase):
with self.assertRaises(ValueError):
tokenizer = LlamaTokenizerFast(SAMPLE_VOCAB, eos_token=None, add_bos_token=True, add_eos_token=True)
@require_jinja
def test_tokenization_for_chat(self):
tokenizer = LlamaTokenizer.from_pretrained("huggyllama/llama-7b", legacy=False)
test_chats = [
[{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
[
{"role": "system", "content": "You are a helpful chatbot."},
{"role": "user", "content": "Hello!"},
{"role": "assistant", "content": "Nice to meet you."},
],
[{"role": "user", "content": "Hello!"}],
]
# Matt: The third test case tests the default system message, but if this is ever changed in the
# class/repo code then that test will fail, and the case will need to be updated.
tokenized_chats = [tokenizer.apply_chat_template(test_chat) for test_chat in test_chats]
# fmt: off
expected_tokens = [
[1, 29961, 25580, 29962, 3532, 14816, 29903, 6778, 13, 3492, 526, 263, 8444, 13563, 7451, 29889, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 10994, 29991, 518, 29914, 25580, 29962],
[1, 29961, 25580, 29962, 3532, 14816, 29903, 6778, 13, 3492, 526, 263, 8444, 13563, 7451, 29889, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 10994, 29991, 518, 29914, 25580, 29962, 20103, 304, 5870, 366, 29889, 29871, 2],
[1, 29961, 25580, 29962, 15043, 29991, 518, 29914, 25580, 29962]
]
# fmt: on
for tokenized_chat, expected_tokens in zip(tokenized_chats, expected_tokens):
self.assertListEqual(tokenized_chat, expected_tokens)
@require_sentencepiece
@require_tokenizers

View File

@ -152,11 +152,10 @@ class LlavaVisionText2TextModelTester:
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
config, pixel_values = config_and_inputs
input_ids = ids_tensor([self.batch_size, self.seq_length], config.text_config.vocab_size - 2) + 2
attention_mask = torch.ones(input_ids.shape, dtype=torch.long).to(torch_device)
input_ids = ids_tensor([self.batch_size, self.seq_length], config.text_config.vocab_size - 1) + 1
input_ids[input_ids == config.image_token_index] = self.pad_token_id
input_ids[:, : self.num_image_tokens] = config.image_token_index
attention_mask = input_ids.ne(1).to(torch_device)
inputs_dict = {
"pixel_values": pixel_values,

View File

@ -275,7 +275,6 @@ class Mistral3IntegrationTest(unittest.TestCase):
self.assertEqual(decoded_output, expected_output)
@require_read_token
@require_deterministic_for_xpu
def test_mistral3_integration_generate(self):
processor = AutoProcessor.from_pretrained(self.model_checkpoint)
processor.chat_template = processor.chat_template.replace('strftime_now("%Y-%m-%d")', '"2025-06-20"')
@ -300,7 +299,7 @@ class Mistral3IntegrationTest(unittest.TestCase):
expected_outputs = Expectations(
{
("xpu", 3): "The image features two tabby cats lying on a pink surface, which appears to be a cushion or",
("xpu", 3): "The image features two cats resting on a pink blanket. The cat on the left is a kitten",
("cuda", 8): 'The image features two cats lying on a pink surface, which appears to be a couch or a bed',
("rocm", (9, 4)): "The image features two cats lying on a pink surface, which appears to be a couch or a bed",
("rocm", (9, 5)): "The image features two tabby cats lying on a pink surface, which appears to be a cushion or"

View File

@ -146,7 +146,7 @@ class MLCDVisionModelIntegrationTest(unittest.TestCase):
@slow
def test_inference(self):
model_name = "DeepGlint-AI/mlcd-vit-bigG-patch14-448"
model = MLCDVisionModel.from_pretrained(model_name, attn_implementation="eager").to(torch_device)
model = MLCDVisionModel.from_pretrained(model_name).to(torch_device)
processor = AutoProcessor.from_pretrained(model_name)
# process single image

View File

@ -547,7 +547,7 @@ class MllamaForConditionalGenerationIntegrationTest(unittest.TestCase):
decoded_output = processor.decode(output[0], skip_special_tokens=True)
expected_outputs = Expectations(
{
("xpu", 3): "If I had to write a haiku about my life, I would write:\nLife is a messy stream\nRipples of joy and pain\nFlowing, ever",
("xpu", 3): "If I had to write a haiku about my life, I would write:\nLife is a messy tapestry\n Threads of joy and sorrow\nWeft of memories",
("cuda", 7): "If I had to write a haiku about my life, I would write:\nLife is a messy stream\nRipples of joy and pain\nFlowing, ever",
("cuda", 8): "If I had to write a haiku about my life, I would write:\nLife is a messy stream\nRipples of joy and pain\nFlowing, ever",
}

View File

@ -193,15 +193,7 @@ class RecurrentGemmaIntegrationTest(unittest.TestCase):
@require_bitsandbytes
@require_read_token
def test_model_2b_8bit(self):
# fmt: off
EXPECTED_TEXTS = Expectations(
{
("xpu", None): ['Hello I am doing a project on the topic of "The impact of the internet on the society" and I am stuck', "Hi today I'm going to show you how to make a simple and easy to make a 3D"],
(None, None): ['Hello I am doing a project on the topic of "The impact of social media on the society" and I am looking', "Hi today I'm going to show you how to make a simple and easy to make a 3D"],
}
)
# fmt: on
EXPECTED_TEXT = EXPECTED_TEXTS.get_expectation()
EXPECTED_TEXTS = ['Hello I am doing a project on the topic of "The impact of social media on the society" and I am looking', "Hi today I'm going to show you how to make a simple and easy to make a 3D"] # fmt: skip
model = AutoModelForCausalLM.from_pretrained(
"gg-hf/recurrent-gemma-2b-hf",
@ -216,7 +208,7 @@ class RecurrentGemmaIntegrationTest(unittest.TestCase):
output = model.generate(**inputs, max_new_tokens=20, do_sample=False)
output_text = tokenizer.batch_decode(output, skip_special_tokens=True)
self.assertEqual(output_text, EXPECTED_TEXT)
self.assertEqual(output_text, EXPECTED_TEXTS)
@require_read_token
def test_long_context(self):

View File

@ -23,7 +23,6 @@ from transformers import (
is_torch_available,
)
from transformers.testing_utils import (
Expectations,
cleanup,
require_torch,
slow,
@ -298,15 +297,9 @@ class VoxtralForConditionalGenerationIntegrationTest(unittest.TestCase):
outputs = model.generate(**inputs, do_sample=False, max_new_tokens=500)
decoded_outputs = self.processor.batch_decode(outputs, skip_special_tokens=True)
# fmt: off
EXPECTED_OUTPUTS = Expectations(
{
(None, None): ["What can you tell me about this audio?This audio is a farewell address by President Barack Obama, delivered in Chicago. In the speech, he reflects on his eight years in office, highlighting the resilience, hope, and unity of the American people. He acknowledges the diverse perspectives and conversations he had with the public, which kept him honest and inspired. The president also emphasizes the importance of self-government and civic engagement, encouraging Americans to participate in their democracy actively. He expresses optimism about the country's future and looks forward to continuing his work as a citizen. The audio concludes with a heartfelt thank you and a blessing for the United States."],
("xpu", None): ["What can you tell me about this audio?This audio is a farewell address by President Barack Obama, delivered in Chicago. In the speech, he reflects on his eight years in office, highlighting the resilience, hope, and unity of the American people. He emphasizes the importance of self-government and active citizenship, encouraging listeners to engage in their communities and participate in democracy. The president expresses his optimism about the country's future and his commitment to continuing to serve as a citizen. He concludes the speech with a heartfelt thank you and a blessing for the United States."],
}
)
# fmt: on
EXPECTED_OUTPUT = EXPECTED_OUTPUTS.get_expectation()
EXPECTED_OUTPUT = [
"What can you tell me about this audio?This audio is a farewell address by President Barack Obama, delivered in Chicago. In the speech, he reflects on his eight years in office, highlighting the resilience, hope, and unity of the American people. He acknowledges the diverse perspectives and conversations he had with the public, which kept him honest and inspired. The president also emphasizes the importance of self-government and civic engagement, encouraging Americans to participate in their democracy actively. He expresses optimism about the country's future and looks forward to continuing his work as a citizen. The audio concludes with a heartfelt thank you and a blessing for the United States."
]
self.assertEqual(decoded_outputs, EXPECTED_OUTPUT)
@slow

View File

@ -35,7 +35,6 @@ from transformers.pipelines import AutomaticSpeechRecognitionPipeline, pipeline
from transformers.pipelines.audio_utils import chunk_bytes_iter, ffmpeg_microphone_live
from transformers.pipelines.automatic_speech_recognition import chunk_iter
from transformers.testing_utils import (
Expectations,
compare_pipeline_output_to_hub_spec,
is_pipeline_test,
is_torch_available,
@ -1444,14 +1443,8 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
@slow
def test_whisper_longform(self):
# fmt: off
EXPECTED_RESULTS = Expectations(
{
(None, None): " Folks, if you watch the show, you know, I spent a lot of time right over there. Patiently and astutely scrutinizing the boxwood and mahogany chest set of the day's biggest stories developing the central headline pawns, definitely maneuvering an oso topical night to F6, fainting a classic Sicilian, nade door variation on the news, all the while seeing eight moves deep and patiently marshalling the latest press releases into a fisher's shows in Lip Nitsky attack that culminates in the elegant lethal slow-played, all-passant checkmate that is my nightly monologue. But sometimes, sometimes, folks, I. CHEERING AND APPLAUSE Sometimes I startle away, cubside down in the monkey bars of a condemned playground on a super fun site. Get all hept up on goofballs. Rummage that were discarded tag bag of defective toys. Yank out a fist bowl of disembodied doll limbs, toss them on Saturday, Rusty Cargo, container down by the Wharf, and challenge toothless drifters to the godless bughouse lets of tournament that is my segment. MUSIC Meanwhile!",
("xpu", None): " Folks, if you watch the show, you know, I spent a lot of time right over there. Patiently and astutely scrutinizing the boxwood and mahogany chest set of the day's biggest stories developing the central headline pawns, definitely maneuvering an oso topical night to F6, fainting of classics, Sicilian, nade door variation on the news, all the while seeing eight moves deep and patiently marshalling the latest press releases into a Fisher shows in Lip Nitsky attack that culminates in the elegant lethal slow-played, all-passant checkmate that is my nightly monologue. But sometimes, sometimes, folks, I... APPLAUSE Sometimes I... Startle away, upside down on the monkey bars of a condemned playground on a superfund site. Get all heaped up on goofballs, rummaged that would discard a tag bag of defective toys, yank out a fist bowl of disembodied doll limbs, toss them on a stain kid's place mat from a defunct denys, set up a table inside a rusty cargo container down by the Wharf and challenge toothless drifters to the godless bug house blitz of tournament that is my segment.",
}
)
EXPECTED_RESULT = " Folks, if you watch the show, you know, I spent a lot of time right over there. Patiently and astutely scrutinizing the boxwood and mahogany chest set of the day's biggest stories developing the central headline pawns, definitely maneuvering an oso topical night to F6, fainting a classic Sicilian, nade door variation on the news, all the while seeing eight moves deep and patiently marshalling the latest press releases into a fisher's shows in Lip Nitsky attack that culminates in the elegant lethal slow-played, all-passant checkmate that is my nightly monologue. But sometimes, sometimes, folks, I. CHEERING AND APPLAUSE Sometimes I startle away, cubside down in the monkey bars of a condemned playground on a super fun site. Get all hept up on goofballs. Rummage that were discarded tag bag of defective toys. Yank out a fist bowl of disembodied doll limbs, toss them on Saturday, Rusty Cargo, container down by the Wharf, and challenge toothless drifters to the godless bughouse lets of tournament that is my segment. MUSIC Meanwhile!"
# fmt: on
EXPECTED_RESULT = EXPECTED_RESULTS.get_expectation()
processor = AutoProcessor.from_pretrained("openai/whisper-tiny.en")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")

View File

@ -12,68 +12,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Run all tests: RUN_SLOW=1 pytest -v tests/tensor_parallel/test_tensor_parallel.py
# Run specific config: RUN_SLOW=1 pytest -v tests/tensor_parallel/test_tensor_parallel.py -k "2Proc"
# Run multiple configs: RUN_SLOW=1 pytest -v tests/tensor_parallel/test_tensor_parallel.py -k "2Proc or 4Proc"
# Run spefic test: RUN_SLOW=1 pytest -v tests/tensor_parallel/test_tensor_parallel.py::TestTensorParallel2Proc::test_model_forward
# Run the test: CUDA_VISIBLE_DEVICES=0,1 RUN_SLOW=1 pytest -sv tests/tensor_parallel/test_tensor_parallel.py
import os
import tempfile
import warnings
import textwrap
from safetensors import safe_open
from transformers import AutoModelForCausalLM, AutoTokenizer, is_torch_available
from transformers import is_torch_available
from transformers.integrations.tensor_parallel import get_packed_weights, repack_weights
from transformers.testing_utils import (
TestCasePlus,
backend_device_count,
get_torch_dist_unique_port,
require_huggingface_hub_greater_or_equal,
require_torch_multi_accelerator,
torch_device,
torchrun,
)
if is_torch_available():
import torch
import torch.multiprocessing as mp
def global_wrapper(rank, func, tp, port, func_args, func_kwargs):
def setup_dist_env(rank, world_size, port):
os.environ["WORLD_SIZE"] = str(world_size)
os.environ["RANK"] = str(rank)
os.environ["LOCAL_RANK"] = str(rank)
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = str(port)
world_size = tp
setup_dist_env(rank, world_size, port)
if torch.cuda.is_available():
torch.cuda.set_device(rank)
torch.distributed.init_process_group(backend="nccl", rank=rank, world_size=world_size)
else:
torch.distributed.init_process_group(backend="gloo", rank=rank, world_size=world_size)
func(rank, *func_args, **func_kwargs)
torch.distributed.barrier()
torch.distributed.destroy_process_group()
def init_distributed(tp: int):
def _init_distributed(func):
def wrapper(*args, **kwargs):
world_size = tp
port = get_torch_dist_unique_port()
spawn_args = (func, tp, port, args, kwargs)
mp.spawn(global_wrapper, args=spawn_args, nprocs=world_size)
return wrapper
return _init_distributed
class TestTensorParallelUtils(TestCasePlus):
@ -105,9 +63,191 @@ class TestTensorParallelUtils(TestCasePlus):
assert torch.allclose(unpacked_weights, original_packed_weights)
class TestTensorParallel(TestCasePlus):
nproc_per_node = 2
def test_model_forward(self):
script_to_run = textwrap.dedent(
"""
import torch
import os
from transformers import AutoModelForCausalLM, AutoTokenizer
model_id = "JackFram/llama-68m"
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto", tp_plan="auto")
torch.distributed.barrier()
has_dtensor = 0
for name, parameter in model.named_parameters():
if isinstance(parameter.data, torch.distributed.tensor.DTensor):
has_dtensor = 1
break
assert has_dtensor == 1, "TP model must has DTensor"
tokenizer = AutoTokenizer.from_pretrained(model_id, legacy=False)
prompt = "Can I help"
inputs = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
outputs = model(inputs)
next_token_logits = outputs[0][:, -1, :]
next_token = torch.argmax(next_token_logits, dim=-1)
response = tokenizer.decode(next_token)
assert response == "with"
torch.distributed.barrier()
torch.distributed.destroy_process_group()
"""
)
torchrun(script_to_run, self.nproc_per_node, env=self.get_env())
def test_model_backward_pass(self):
script_to_run = textwrap.dedent(
"""
import torch
import os
from transformers import AutoModelForCausalLM
from torch import nn
model_id = "JackFram/llama-68m"
model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float32, tp_plan="auto")
torch.distributed.barrier()
# Dummy forward and backward pass
# Note that loss.backward() will fail if there is a bug in the TP implementation
inputs = torch.randint(0, model.config.vocab_size, (2, 10), device=model.device)
labels = torch.randint(0, model.config.vocab_size, (2, 10), device=model.device)
loss = model(inputs, labels=labels).loss
loss.backward()
torch.distributed.barrier()
torch.distributed.destroy_process_group()
"""
)
torchrun(script_to_run, self.nproc_per_node, env=self.get_env())
def test_model_generate(self):
script_to_run = textwrap.dedent(
"""
import torch
import os
from transformers import AutoModelForCausalLM, AutoTokenizer
model_id = "JackFram/llama-68m"
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto", tp_plan="auto")
torch.distributed.barrier()
model.forward = torch.compile(model.forward)
has_dtensor = 0
for name, parameter in model.named_parameters():
if isinstance(parameter.data, torch.distributed.tensor.DTensor):
has_dtensor = 1
break
assert has_dtensor == 1, "TP model must have DTensor"
tokenizer = AutoTokenizer.from_pretrained(model_id)
prompt = "Can I help"
inputs = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
outputs = model.generate(inputs, max_new_tokens=10, cache_implementation="static")
output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
assert output_text[0].startswith(prompt), f"Expected output to start with '{prompt}', got '{output_text[0]}'"
torch.distributed.barrier()
torch.distributed.destroy_process_group()
"""
)
torchrun(script_to_run, self.nproc_per_node, env=self.get_env())
@require_huggingface_hub_greater_or_equal("0.31.4")
def test_model_save(self):
from safetensors import safe_open
with tempfile.TemporaryDirectory() as tmp_dir:
for is_torchrun in [True, False]:
script_to_run = textwrap.dedent(
f"""
import torch
import os
from transformers import AutoModelForCausalLM
model_id = "JackFram/llama-68m"
kwargs = dict()
if os.environ.get("RANK", None) is not None:
kwargs["tp_plan"] = "auto"
result_dir = "{tmp_dir}/tp"
else:
result_dir = "{tmp_dir}/nontp"
model = AutoModelForCausalLM.from_pretrained(model_id, **kwargs)
model.save_pretrained(result_dir)
"""
)
torchrun(script_to_run, self.nproc_per_node, is_torchrun=is_torchrun, env=self.get_env())
non_tp_model_path = os.path.join(tmp_dir, "nontp")
tp_model_path = os.path.join(tmp_dir, "tp")
for filename in os.listdir(non_tp_model_path):
if not filename.endswith(".safetensors"):
continue
non_tp_model = safe_open(os.path.join(non_tp_model_path, filename), device="cpu", framework="pt")
tp_model = safe_open(os.path.join(tp_model_path, filename), device="cpu", framework="pt")
for non_tp_key in non_tp_model.keys():
non_tp_tensor = non_tp_model.get_tensor(non_tp_key)
tp_tensor = tp_model.get_tensor(non_tp_key)
assert torch.allclose(non_tp_tensor, tp_tensor), f"Tensor with key: {non_tp_key} does not match"
del non_tp_tensor, tp_tensor
def test_custom_tp_plan(self):
script_to_run = textwrap.dedent(
r"""
import re
import torch
from torch.distributed.tensor import DTensor
from transformers import AutoModelForCausalLM
model_id = "JackFram/llama-68m"
# only shard attentions, but not mlps
tp_plan = {
"model.layers.*.self_attn.q_proj": "colwise",
"model.layers.*.self_attn.k_proj": "colwise",
"model.layers.*.self_attn.v_proj": "colwise",
"model.layers.*.self_attn.o_proj": "rowwise",
}
# Use custom tp_plan directly in from_pretrained
model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.bfloat16, tp_plan=tp_plan)
# Check we can generate with the tp_plan
inputs = torch.randint(100, 200, (1, 10), device=model.device)
out = model.generate(inputs, max_new_tokens=10, do_sample=False)
# Check only the attentions are sharded
for name, param in model.named_parameters():
if re.search(r"\.self_attn\.(q|k|v|o)_proj\.", name):
assert isinstance(param, DTensor)
else:
assert not isinstance(param, DTensor)
"""
)
torchrun(script_to_run, self.nproc_per_node, env=self.get_env())
class TestTensorParallelProperties(TestCasePlus):
def test_tp_plan_property_setter_getter(self):
"""Test that tp_plan property can be set and retrieved correctly."""
from transformers import AutoModelForCausalLM
model_id = "JackFram/llama-68m"
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto")
@ -135,6 +275,8 @@ class TestTensorParallelProperties(TestCasePlus):
def test_tp_plan_validation_invalid_style(self):
"""Test that invalid parallel styles are rejected."""
from transformers import AutoModelForCausalLM
model_id = "JackFram/llama-68m"
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto")
@ -147,6 +289,9 @@ class TestTensorParallelProperties(TestCasePlus):
def test_tp_plan_validation_nonexistent_layer_warning(self):
"""Test that warnings are issued for non-existent layer patterns."""
import warnings
from transformers import AutoModelForCausalLM
model_id = "JackFram/llama-68m"
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto")
@ -163,6 +308,10 @@ class TestTensorParallelProperties(TestCasePlus):
def test_tp_plan_valid_layer_patterns(self):
"""Test that valid layer patterns are accepted without warnings."""
import warnings
from transformers import AutoModelForCausalLM
model_id = "JackFram/llama-68m"
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto")
@ -198,6 +347,8 @@ class TestTensorParallelProperties(TestCasePlus):
def test_tp_plan_none_handling(self):
"""Test that None values are handled correctly."""
from transformers import AutoModelForCausalLM
model_id = "JackFram/llama-68m"
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto")
@ -210,172 +361,6 @@ class TestTensorParallelProperties(TestCasePlus):
self.assertEqual(model.tp_plan, {"model.layers.*.self_attn.q_proj": "colwise"})
# ====== TEST FUNCTIONS ======
def _test_model_forward_impl(rank):
"""Implementation of test_model_forward for distributed execution."""
model_id = "JackFram/llama-68m"
int(os.environ["RANK"])
int(os.environ["WORLD_SIZE"])
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto", tp_plan="auto")
torch.distributed.barrier()
has_dtensor = 0
for name, parameter in model.named_parameters():
if isinstance(parameter.data, torch.distributed.tensor.DTensor):
has_dtensor = 1
break
assert has_dtensor == 1, "TP model must has DTensor"
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
prompt = "Can I help"
inputs = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
outputs = model(inputs)
next_token_logits = outputs[0][:, -1, :]
next_token = torch.argmax(next_token_logits, dim=-1)
response = tokenizer.decode(next_token)
assert response == "with"
print("response:", response)
torch.distributed.barrier()
def _test_model_backward_pass_impl(rank):
"""Implementation of test_model_backward_pass for distributed execution."""
model_id = "JackFram/llama-68m"
model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float32, tp_plan="auto")
torch.distributed.barrier()
# Dummy forward and backward pass
# Note that loss.backward() will fail if there is a bug in the TP implementation
inputs = torch.randint(0, model.config.vocab_size, (2, 10), device=model.device)
labels = torch.randint(0, model.config.vocab_size, (2, 10), device=model.device)
loss = model(inputs, labels=labels).loss
loss.backward()
torch.distributed.barrier()
def _test_model_generate_impl(rank):
"""Implementation of test_model_generate for distributed execution."""
model_id = "JackFram/llama-68m"
int(os.environ["RANK"])
int(os.environ["WORLD_SIZE"])
model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto", tp_plan="auto")
torch.distributed.barrier()
model.forward = torch.compile(model.forward)
has_dtensor = 0
for name, parameter in model.named_parameters():
if isinstance(parameter.data, torch.distributed.tensor.DTensor):
has_dtensor = 1
break
assert has_dtensor == 1, "TP model must has DTensor"
tokenizer = AutoTokenizer.from_pretrained(model_id)
prompt = "Can I help"
inputs = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
outputs = model.generate(inputs, max_new_tokens=10, cache_implementation="static")
output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
assert output_text[0].startswith(prompt), f"Expected output to start with '{prompt}', got '{output_text[0]}'"
torch.distributed.barrier()
def _test_model_save_impl(rank, tmp_dir, is_torchrun):
"""Implementation of test_model_save for distributed execution."""
model_id = "JackFram/llama-68m"
kwargs = {}
if os.environ.get("RANK", None) is not None:
kwargs["tp_plan"] = "auto"
result_dir = f"{tmp_dir}/tp"
else:
result_dir = f"{tmp_dir}/nontp"
model = AutoModelForCausalLM.from_pretrained(model_id, **kwargs)
model.save_pretrained(result_dir)
class TestTensorParallelBase(TestCasePlus):
"""Base class for tensor parallel tests. Subclasses must set nproc_per_node."""
nproc_per_node = None
@require_torch_multi_accelerator
def test_model_forward(self):
if self.nproc_per_node is None:
self.skipTest("nproc_per_node not set")
if backend_device_count(torch_device) < self.nproc_per_node:
self.skipTest(f"Need at least {self.nproc_per_node} devices, have {backend_device_count(torch_device)}")
init_distributed(tp=self.nproc_per_node)(_test_model_forward_impl)()
@require_torch_multi_accelerator
def test_model_backward_pass(self):
if self.nproc_per_node is None:
self.skipTest("nproc_per_node not set")
if backend_device_count(torch_device) < self.nproc_per_node:
self.skipTest(f"Need at least {self.nproc_per_node} devices, have {backend_device_count(torch_device)}")
init_distributed(tp=self.nproc_per_node)(_test_model_backward_pass_impl)()
@require_torch_multi_accelerator
def test_model_generate(self):
if self.nproc_per_node is None:
self.skipTest("nproc_per_node not set")
if backend_device_count(torch_device) < self.nproc_per_node:
self.skipTest(f"Need at least {self.nproc_per_node} devices, have {backend_device_count(torch_device)}")
init_distributed(tp=self.nproc_per_node)(_test_model_generate_impl)()
@require_huggingface_hub_greater_or_equal("0.31.4")
@require_torch_multi_accelerator
def test_model_save(self):
if self.nproc_per_node is None:
self.skipTest("nproc_per_node not set")
if backend_device_count(torch_device) < self.nproc_per_node:
self.skipTest(f"Need at least {self.nproc_per_node} devices, have {backend_device_count(torch_device)}")
with tempfile.TemporaryDirectory() as tmp_dir:
# First run with TP (distributed)
init_distributed(tp=self.nproc_per_node)(_test_model_save_impl)(tmp_dir, True)
# Then run without TP (non-distributed)
_test_model_save_impl(0, tmp_dir, False)
non_tp_model_path = os.path.join(tmp_dir, "nontp")
tp_model_path = os.path.join(tmp_dir, "tp")
for filename in os.listdir(non_tp_model_path):
if not filename.endswith(".safetensors"):
continue
non_tp_model = safe_open(os.path.join(non_tp_model_path, filename), device="cpu", framework="pt")
tp_model = safe_open(os.path.join(tp_model_path, filename), device="cpu", framework="pt")
for non_tp_key in non_tp_model.keys():
non_tp_tensor = non_tp_model.get_tensor(non_tp_key)
tp_tensor = tp_model.get_tensor(non_tp_key)
assert torch.allclose(non_tp_tensor, tp_tensor), f"Tensor with key: {non_tp_key} does not match"
del non_tp_tensor, tp_tensor
class TestTensorParallel2Proc(TestTensorParallelBase):
"""Test tensor parallel with 2 processes."""
nproc_per_node = 2
class TestTensorParallel4Proc(TestTensorParallelBase):
"""Test tensor parallel with 4 processes."""
nproc_per_node = 4
@require_torch_multi_accelerator
class TestTensorParallelAccelerator(TestTensorParallel):
nproc_per_node = backend_device_count(torch_device)

View File

@ -37,9 +37,8 @@ from transformers.testing_utils import (
from transformers.utils import is_torch_available, is_vision_available
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
sys.path.append(os.path.join(parent_dir, "utils"))
from fetch_hub_objects_for_ci import url_to_local_path # noqa: E402
sys.path.append(".")
from utils.fetch_hub_objects_for_ci import url_to_local_path
global_rng = random.Random()

View File

@ -799,9 +799,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
# Test 2:
# without tokenize
self.assertEqual(
self.tokenizer.apply_chat_template(conversation, tokenize=True).input_ids, expected_tokenized.tokens
)
self.assertEqual(self.tokenizer.apply_chat_template(conversation, tokenize=True), expected_tokenized.tokens)
with self.assertRaises(
ValueError, msg="Kwargs [unk_args] are not supported by `MistralCommonTokenizer.apply_chat_template`."
@ -826,7 +824,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
expected_tokenized.text,
)
self.assertEqual(
self.tokenizer.apply_chat_template(conversation, tokenize=True, continue_final_message=True).input_ids,
self.tokenizer.apply_chat_template(conversation, tokenize=True, continue_final_message=True),
expected_tokenized.tokens,
)
@ -848,7 +846,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
token_outputs = self.tokenizer.apply_chat_template(
conversation, tokenize=True, add_generation_prompt=add_generation_prompt
)
self.assertEqual(token_outputs.input_ids, expected_tokenized.tokens)
self.assertEqual(token_outputs, expected_tokenized.tokens)
# Test 2:
# with continue_final_message
@ -960,16 +958,18 @@ class TestMistralCommonTokenizer(unittest.TestCase):
},
]
output = self.tokenizer.apply_chat_template(conversation).input_ids
output = self.tokenizer.apply_chat_template(conversation, tokenize=True)
self.assertEqual(output, expected_tokenized.tokens)
output_dict = self.tokenizer.apply_chat_template(conversation, tokenize=True)
output_dict = self.tokenizer.apply_chat_template(conversation, tokenize=True, return_dict=True)
self.assertEqual(output_dict["input_ids"], expected_tokenized.tokens)
self.assertEqual(len(output_dict["pixel_values"]), len(expected_tokenized.images))
for o, e in zip(output_dict["pixel_values"], expected_tokenized.images):
self.assertTrue(np.allclose(o, e))
output_dict = self.tokenizer.apply_chat_template(conversation, tokenize=True, return_tensors="pt")
output_dict = self.tokenizer.apply_chat_template(
conversation, tokenize=True, return_dict=True, return_tensors="pt"
)
self.assertEqual(output_dict["input_ids"].tolist()[0], expected_tokenized.tokens)
expected_images_pt_tensor = torch.from_numpy(np.stack(expected_tokenized.images))
self.assertTrue(torch.allclose(output_dict["pixel_values"], expected_images_pt_tensor))
@ -1013,7 +1013,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
},
]
output = self.tokenizer_audio.apply_chat_template(conversation, tokenize=True).input_ids
output = self.tokenizer_audio.apply_chat_template(conversation, tokenize=True)
self.assertEqual(output, expected_tokenized.tokens)
output_dict = self.tokenizer_audio.apply_chat_template(conversation, tokenize=True, return_dict=True)
@ -1041,14 +1041,14 @@ class TestMistralCommonTokenizer(unittest.TestCase):
# Test 1:
# with truncation
self.assertEqual(
self.tokenizer.apply_chat_template(conversation, tokenize=True, truncation=True, max_length=20).input_ids,
self.tokenizer.apply_chat_template(conversation, tokenize=True, truncation=True, max_length=20),
expected_tokenized.tokens[:20],
)
# Test 2:
# without truncation
self.assertEqual(
self.tokenizer.apply_chat_template(conversation, tokenize=True, truncation=False, max_length=20).input_ids,
self.tokenizer.apply_chat_template(conversation, tokenize=True, truncation=False, max_length=20),
expected_tokenized.tokens,
)
@ -1130,7 +1130,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
]
text_outputs = self.tokenizer.apply_chat_template(conversations, tools=tools, tokenize=False)
token_outputs = self.tokenizer.apply_chat_template(conversations, tools=tools, tokenize=True).input_ids
token_outputs = self.tokenizer.apply_chat_template(conversations, tools=tools, tokenize=True)
self.assertEqual(len(text_outputs), len(token_outputs))
self.assertEqual(len(text_outputs), len(expected_tokenized))
@ -1202,7 +1202,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
ChatCompletionRequest.from_openai(ref_conversation)
)
output = self.tokenizer.apply_chat_template(conversations, tokenize=True).input_ids
output = self.tokenizer.apply_chat_template(conversations, tokenize=True)
self.assertEqual(output, [expected_tokenized.tokens] * 3)
output = self.tokenizer.apply_chat_template(conversations, tokenize=True, return_dict=True)
@ -1248,9 +1248,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
for conversation in conversations
]
token_outputs = self.tokenizer.apply_chat_template(
conversations, tokenize=True, continue_final_message=True
).input_ids
token_outputs = self.tokenizer.apply_chat_template(conversations, tokenize=True, continue_final_message=True)
for output, expected in zip(token_outputs, expected_tokenized):
self.assertEqual(output, expected.tokens)
@ -1299,7 +1297,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
]
token_outputs = self.tokenizer.apply_chat_template(
conversations, tokenize=True, add_generation_prompt=add_generation_prompt
).input_ids
)
for output, expected in zip(token_outputs, expected_tokenized):
self.assertEqual(output, expected.tokens)
@ -1333,7 +1331,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
# with truncation
token_outputs = self.tokenizer.apply_chat_template(
self.fixture_conversations, tokenize=True, truncation=True, max_length=20
).input_ids
)
for output, expected in zip(token_outputs, self.tokenized_fixture_conversations):
self.assertEqual(output, expected.tokens[:20])
@ -1342,7 +1340,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
# without truncation
token_outputs = self.tokenizer.apply_chat_template(
self.fixture_conversations, tokenize=True, truncation=False, max_length=20
).input_ids
)
self.assertEqual(len(token_outputs), len(self.tokenized_fixture_conversations))
for output, expected in zip(token_outputs, self.tokenized_fixture_conversations):
self.assertEqual(output, expected.tokens)
@ -1360,9 +1358,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
for padding in [True, "max_length", PaddingStrategy.LONGEST, PaddingStrategy.MAX_LENGTH]:
if padding == PaddingStrategy.MAX_LENGTH:
# No padding if no max length is provided
token_outputs = self.tokenizer.apply_chat_template(
self.fixture_conversations, padding=padding, return_dict=False
)
token_outputs = self.tokenizer.apply_chat_template(self.fixture_conversations, padding=padding)
self.assertEqual(len(token_outputs), len(self.tokenized_fixture_conversations))
for output, expected in zip(token_outputs, self.tokenized_fixture_conversations):
self.assertEqual(output, expected.tokens)
@ -1370,7 +1366,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
max_length = 20 if padding == PaddingStrategy.MAX_LENGTH else None
token_outputs = self.tokenizer.apply_chat_template(
self.fixture_conversations, tokenize=True, padding=padding, max_length=max_length, return_dict=False
self.fixture_conversations, tokenize=True, padding=padding, max_length=max_length
)
if padding != PaddingStrategy.MAX_LENGTH:
@ -1394,7 +1390,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
for padding in [False, "do_not_pad", PaddingStrategy.DO_NOT_PAD]:
token_outputs = self.tokenizer.apply_chat_template(
self.fixture_conversations, tokenize=True, padding=padding, return_dict=False
self.fixture_conversations, tokenize=True, padding=padding
)
self.assertEqual(len(token_outputs), len(self.tokenized_fixture_conversations))
for output, expected in zip(token_outputs, self.tokenized_fixture_conversations):
@ -1406,12 +1402,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
max_length = 20
for padding in [True, "max_length", PaddingStrategy.LONGEST, PaddingStrategy.MAX_LENGTH]:
token_outputs = self.tokenizer.apply_chat_template(
self.fixture_conversations,
tokenize=True,
truncation=True,
padding=padding,
max_length=max_length,
return_dict=False,
self.fixture_conversations, tokenize=True, truncation=True, padding=padding, max_length=max_length
)
self.assertEqual(len(token_outputs), len(self.tokenized_fixture_conversations))
for output, expected in zip(token_outputs, self.tokenized_fixture_conversations):
@ -1420,12 +1411,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
)
for padding in [False, "do_not_pad", PaddingStrategy.DO_NOT_PAD]:
token_outputs = self.tokenizer.apply_chat_template(
self.fixture_conversations,
tokenize=True,
truncation=True,
padding=padding,
max_length=max_length,
return_dict=False,
self.fixture_conversations, tokenize=True, truncation=True, padding=padding, max_length=max_length
)
self.assertEqual(len(token_outputs), len(self.tokenized_fixture_conversations))
for output, expected in zip(token_outputs, self.tokenized_fixture_conversations):
@ -1435,7 +1421,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
# Test 1:
# with tokenize
token_outputs = self.tokenizer.apply_chat_template(
self.fixture_conversations, tokenize=True, return_tensors="pt", padding=True, return_dict=False
self.fixture_conversations, tokenize=True, return_tensors="pt", padding=True
)
self.assertIsInstance(token_outputs, torch.Tensor)
self.assertEqual(
@ -1446,7 +1432,7 @@ class TestMistralCommonTokenizer(unittest.TestCase):
# Test 2:
# without tokenize, should ignore return_tensors
token_outputs = self.tokenizer.apply_chat_template(
self.fixture_conversations, tokenize=False, return_tensors="pt", padding=True, return_dict=False
self.fixture_conversations, tokenize=False, return_tensors="pt", padding=True
)
self.assertEqual(token_outputs, [t.text for t in self.tokenized_fixture_conversations])

View File

@ -323,7 +323,7 @@ class TokenizerUtilsTest(unittest.TestCase):
]
# First, test the default case, where we encode the whole conversation at once
whole_conversation_tokens = tokenizer.apply_chat_template(conversation, tokenize=True, return_dict=False)
whole_conversation_tokens = tokenizer.apply_chat_template(conversation, tokenize=True)
# Now, test the message-by-message encoding
tokens = []

View File

@ -200,40 +200,6 @@ class ChatSchemaParserTest(unittest.TestCase):
tokenizer_parsed_chat = tokenizer.parse_response(model_out)
self.assertEqual(tokenizer_parsed_chat, parsed_chat)
def test_batched_inputs(self):
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
model_out = '<|START_THINKING|>I should call a tool.<|END_THINKING|><|START_ACTION|>[\n {"tool_call_id": "0", "tool_name": "simple_tool", "parameters": {"temperature_format": "Celsius"}}\n]<|END_ACTION|><|END_OF_TURN_TOKEN|>'
tokenizer.response_schema = cohere_schema
parsed_chat = tokenizer.parse_response(model_out)
self.assertEqual(tokenizer.parse_response([model_out]), [parsed_chat])
self.assertEqual(tokenizer.parse_response([model_out] * 2), [parsed_chat] * 2)
def test_token_id_inputs(self):
tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2") # Need an actual tokenizer to encode
model_out = '<|START_THINKING|>I should call a tool.<|END_THINKING|><|START_ACTION|>[\n {"tool_call_id": "0", "tool_name": "simple_tool", "parameters": {"temperature_format": "Celsius"}}\n]<|END_ACTION|><|END_OF_TURN_TOKEN|>'
tokenizer.response_schema = cohere_schema
parsed_chat = tokenizer.parse_response(model_out)
tokenized_out = tokenizer(model_out).input_ids
self.assertEqual(tokenizer.parse_response(tokenized_out), parsed_chat)
self.assertEqual(tokenizer.parse_response([tokenized_out]), [parsed_chat])
self.assertEqual(tokenizer.parse_response([tokenized_out] * 2), [parsed_chat] * 2)
def test_numpy_inputs(self):
tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2") # Need an actual tokenizer to encode
model_out = '<|START_THINKING|>I should call a tool.<|END_THINKING|><|START_ACTION|>[\n {"tool_call_id": "0", "tool_name": "simple_tool", "parameters": {"temperature_format": "Celsius"}}\n]<|END_ACTION|><|END_OF_TURN_TOKEN|>'
tokenizer.response_schema = cohere_schema
parsed_chat = tokenizer.parse_response(model_out)
tokenized_out = tokenizer(model_out, return_tensors="np").input_ids
self.assertEqual(tokenizer.parse_response(tokenized_out), [parsed_chat])
def test_tensor_inputs(self):
tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2") # Need an actual tokenizer to encode
model_out = '<|START_THINKING|>I should call a tool.<|END_THINKING|><|START_ACTION|>[\n {"tool_call_id": "0", "tool_name": "simple_tool", "parameters": {"temperature_format": "Celsius"}}\n]<|END_ACTION|><|END_OF_TURN_TOKEN|>'
tokenizer.response_schema = cohere_schema
parsed_chat = tokenizer.parse_response(model_out)
tokenized_out = tokenizer(model_out, return_tensors="pt").input_ids
self.assertEqual(tokenizer.parse_response(tokenized_out), [parsed_chat])
def test_cohere_template(self):
model_out = '<|START_THINKING|>I should call a tool.<|END_THINKING|><|START_ACTION|>[\n {"tool_call_id": "0", "tool_name": "simple_tool", "parameters": {"temperature_format": "Celsius"}}\n]<|END_ACTION|><|END_OF_TURN_TOKEN|>'
parsed_chat = recursive_parse(model_out, cohere_schema)
@ -315,7 +281,6 @@ class ChatSchemaParserTest(unittest.TestCase):
self.assertEqual(
parsed_chat,
{
"role": "assistant",
"thinking": 'Okay, the user said, "Hello! How are you?" I need to respond appropriately. Since this is the first message, I should greet them back and ask how I can assist. I should keep it friendly and open-ended. Let me make sure the response is welcoming and encourages them to share what they need help with. I\'ll avoid any technical jargon and keep it simple. Let me check for any typos and ensure the tone is positive.',
"tool_calls": [
{
@ -337,10 +302,9 @@ class ChatSchemaParserTest(unittest.TestCase):
self.assertEqual(
parsed_chat,
{
"role": "assistant",
"tool_calls": [
{"type": "function", "function": {"name": "get_weather", "arguments": {"city": "Paris"}}}
],
]
},
)
@ -350,7 +314,6 @@ class ChatSchemaParserTest(unittest.TestCase):
self.assertEqual(
parsed_chat,
{
"role": "assistant",
"content": "Some content about gravity goes here but I'm cutting it off to make this shorter!",
"thinking": 'Okay, the user asked, "Hey! Can you tell me about gravity?" Let me start by breaking down what they might be looking for. They probably want a basic understanding of gravity, maybe for a school project or just personal curiosity. I should explain what gravity is, how it works, and maybe some examples.',
},
@ -362,7 +325,6 @@ class ChatSchemaParserTest(unittest.TestCase):
self.assertEqual(
parsed_chat,
{
"role": "assistant",
"tool_calls": [
{
"type": "function",
@ -374,6 +336,6 @@ class ChatSchemaParserTest(unittest.TestCase):
},
},
}
],
]
},
)

View File

@ -1407,10 +1407,7 @@ if __name__ == "__main__":
if not os.path.isdir(os.path.join(os.getcwd(), f"ci_results_{job_name}")):
os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}"))
nvidia_daily_ci_workflow = (
"huggingface/transformers/.github/workflows/self-scheduled-caller.yml",
"huggingface/transformers/.github/workflows/self-scheduled-flash-attn-caller.yml",
)
nvidia_daily_ci_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml"
amd_daily_ci_workflows = (
"huggingface/transformers/.github/workflows/self-scheduled-amd-mi325-caller.yml",
"huggingface/transformers/.github/workflows/self-scheduled-amd-mi355-caller.yml",