mirror of
https://github.com/huggingface/transformers.git
synced 2025-11-06 13:34:37 +08:00
Compare commits
20 Commits
trigger-re
...
allow_ci_t
| Author | SHA1 | Date | |
|---|---|---|---|
| 5e54865c88 | |||
| 0cf5408dea | |||
| 8e6fa6b59f | |||
| e304be32e9 | |||
| b898e54c73 | |||
| 7e00e28ccb | |||
| a02f0a871e | |||
| 8b37093b17 | |||
| d6ebe4d6ec | |||
| 69f5fa1940 | |||
| 6dc035bd97 | |||
| 741f6632d5 | |||
| c593e135cb | |||
| a436e29cae | |||
| b4b503befd | |||
| d0e5cea195 | |||
| 8f16ac0fae | |||
| e336a60875 | |||
| c02d17e00d | |||
| 56b91f3314 |
6
.github/workflows/model_jobs.yml
vendored
6
.github/workflows/model_jobs.yml
vendored
@ -12,8 +12,8 @@ on:
|
|||||||
slice_id:
|
slice_id:
|
||||||
required: true
|
required: true
|
||||||
type: number
|
type: number
|
||||||
runner:
|
runner_map:
|
||||||
required: true
|
required: false
|
||||||
type: string
|
type: string
|
||||||
docker:
|
docker:
|
||||||
required: true
|
required: true
|
||||||
@ -45,7 +45,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
|
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
|
||||||
runs-on:
|
runs-on:
|
||||||
group: '${{ inputs.machine_type }}'
|
group: ${{ fromJson(inputs.runner_map)[matrix.folders][inputs.machine_type] }}
|
||||||
container:
|
container:
|
||||||
image: ${{ inputs.docker }}
|
image: ${{ inputs.docker }}
|
||||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
|
|||||||
128
.github/workflows/model_jobs_amd.yml
vendored
128
.github/workflows/model_jobs_amd.yml
vendored
@ -1,128 +0,0 @@
|
|||||||
name: model jobs
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_call:
|
|
||||||
inputs:
|
|
||||||
folder_slices:
|
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
machine_type:
|
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
slice_id:
|
|
||||||
required: true
|
|
||||||
type: number
|
|
||||||
runner:
|
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
docker:
|
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
|
|
||||||
env:
|
|
||||||
HF_HOME: /mnt/cache
|
|
||||||
TRANSFORMERS_IS_CI: yes
|
|
||||||
OMP_NUM_THREADS: 8
|
|
||||||
MKL_NUM_THREADS: 8
|
|
||||||
RUN_SLOW: yes
|
|
||||||
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
|
|
||||||
# This token is created under the bot `hf-transformers-bot`.
|
|
||||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
|
||||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
|
||||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
|
||||||
CUDA_VISIBLE_DEVICES: 0,1
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
run_models_gpu:
|
|
||||||
name: " "
|
|
||||||
strategy:
|
|
||||||
max-parallel: 1 # For now, not to parallelize. Can change later if it works well.
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
|
|
||||||
runs-on: ['${{ inputs.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
|
|
||||||
container:
|
|
||||||
image: ${{ inputs.docker }}
|
|
||||||
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
|
||||||
steps:
|
|
||||||
- name: Echo input and matrix info
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
echo "${{ inputs.folder_slices }}"
|
|
||||||
echo "${{ matrix.folders }}"
|
|
||||||
echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}"
|
|
||||||
|
|
||||||
- name: Echo folder ${{ matrix.folders }}
|
|
||||||
shell: bash
|
|
||||||
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
|
|
||||||
# set the artifact folder names (because the character `/` is not allowed).
|
|
||||||
run: |
|
|
||||||
echo "${{ matrix.folders }}"
|
|
||||||
matrix_folders=${{ matrix.folders }}
|
|
||||||
matrix_folders=${matrix_folders/'models/'/'models_'}
|
|
||||||
echo "$matrix_folders"
|
|
||||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Update clone
|
|
||||||
working-directory: /transformers
|
|
||||||
run: git fetch && git checkout ${{ github.sha }}
|
|
||||||
|
|
||||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
|
||||||
working-directory: /transformers
|
|
||||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
|
||||||
|
|
||||||
- name: Update / Install some packages (for Past CI)
|
|
||||||
if: ${{ contains(inputs.docker, '-past-') }}
|
|
||||||
working-directory: /transformers
|
|
||||||
run: |
|
|
||||||
python3 -m pip install -U datasets
|
|
||||||
|
|
||||||
- name: Update / Install some packages (for Past CI)
|
|
||||||
if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
|
|
||||||
working-directory: /transformers
|
|
||||||
run: |
|
|
||||||
python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
|
||||||
|
|
||||||
- name: ROCM-SMI
|
|
||||||
run: |
|
|
||||||
rocm-smi
|
|
||||||
|
|
||||||
- name: ROCM-INFO
|
|
||||||
run: |
|
|
||||||
rocminfo | grep "Agent" -A 14
|
|
||||||
|
|
||||||
- name: Show ROCR environment
|
|
||||||
run: |
|
|
||||||
echo "ROCR: $ROCR_VISIBLE_DEVICES"
|
|
||||||
|
|
||||||
- name: Environment
|
|
||||||
working-directory: /transformers
|
|
||||||
run: |
|
|
||||||
python3 utils/print_env.py
|
|
||||||
|
|
||||||
- name: Show installed libraries and their versions
|
|
||||||
working-directory: /transformers
|
|
||||||
run: pip freeze
|
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
|
||||||
working-directory: /transformers
|
|
||||||
run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} -m "not not_device_test"
|
|
||||||
|
|
||||||
- name: Failure short reports
|
|
||||||
if: ${{ failure() }}
|
|
||||||
continue-on-error: true
|
|
||||||
run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
|
|
||||||
|
|
||||||
- name: Run test
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
|
|
||||||
echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
|
|
||||||
echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
|
|
||||||
|
|
||||||
- name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
|
|
||||||
if: ${{ always() }}
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
|
|
||||||
path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
|
|
||||||
68
.github/workflows/self-scheduled-caller.yml
vendored
68
.github/workflows/self-scheduled-caller.yml
vendored
@ -7,7 +7,7 @@ on:
|
|||||||
- cron: "17 2 * * *"
|
- cron: "17 2 * * *"
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- trigger-remove-script-datasets-in-tests
|
- allow_ci_to_use_a10
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
prev_workflow_run_id:
|
prev_workflow_run_id:
|
||||||
@ -22,10 +22,10 @@ on:
|
|||||||
default: ""
|
default: ""
|
||||||
|
|
||||||
|
|
||||||
# Used for `push` to easily modiffy the target workflow runs to compare against
|
# Used for `push` to easily modify the target workflow runs to compare against
|
||||||
env:
|
env:
|
||||||
prev_workflow_run_id: ""
|
prev_workflow_run_id: ""
|
||||||
other_workflow_run_id: "15770139098"
|
other_workflow_run_id: ""
|
||||||
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
@ -51,8 +51,68 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
job: run_models_gpu
|
job: run_models_gpu
|
||||||
slack_report_channel: "#transformers-ci-daily-models"
|
slack_report_channel: "#transformers-ci-daily-models"
|
||||||
runner: daily-ci
|
|
||||||
docker: huggingface/transformers-all-latest-gpu
|
docker: huggingface/transformers-all-latest-gpu
|
||||||
ci_event: Daily CI
|
ci_event: Daily CI
|
||||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
|
# torch-pipeline:
|
||||||
|
# name: Torch pipeline CI
|
||||||
|
# uses: ./.github/workflows/self-scheduled.yml
|
||||||
|
# with:
|
||||||
|
# job: run_pipelines_torch_gpu
|
||||||
|
# slack_report_channel: "#transformers-ci-daily-pipeline-torch"
|
||||||
|
# runner: daily-ci
|
||||||
|
# docker: huggingface/transformers-pytorch-gpu
|
||||||
|
# ci_event: Daily CI
|
||||||
|
# report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||||
|
# secrets: inherit
|
||||||
|
#
|
||||||
|
# example-ci:
|
||||||
|
# name: Example CI
|
||||||
|
# uses: ./.github/workflows/self-scheduled.yml
|
||||||
|
# with:
|
||||||
|
# job: run_examples_gpu
|
||||||
|
# slack_report_channel: "#transformers-ci-daily-examples"
|
||||||
|
# runner: daily-ci
|
||||||
|
# docker: huggingface/transformers-all-latest-gpu
|
||||||
|
# ci_event: Daily CI
|
||||||
|
# report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||||
|
# secrets: inherit
|
||||||
|
#
|
||||||
|
# trainer-fsdp-ci:
|
||||||
|
# name: Trainer/FSDP CI
|
||||||
|
# uses: ./.github/workflows/self-scheduled.yml
|
||||||
|
# with:
|
||||||
|
# job: run_trainer_and_fsdp_gpu
|
||||||
|
# slack_report_channel: "#transformers-ci-daily-training"
|
||||||
|
# runner: daily-ci
|
||||||
|
# docker: huggingface/transformers-all-latest-gpu
|
||||||
|
# ci_event: Daily CI
|
||||||
|
# report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||||
|
# secrets: inherit
|
||||||
|
#
|
||||||
|
# deepspeed-ci:
|
||||||
|
# name: DeepSpeed CI
|
||||||
|
# uses: ./.github/workflows/self-scheduled.yml
|
||||||
|
# with:
|
||||||
|
# job: run_torch_cuda_extensions_gpu
|
||||||
|
# slack_report_channel: "#transformers-ci-daily-training"
|
||||||
|
# runner: daily-ci
|
||||||
|
# docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
|
||||||
|
# ci_event: Daily CI
|
||||||
|
# working-directory-prefix: /workspace
|
||||||
|
# report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||||
|
# secrets: inherit
|
||||||
|
#
|
||||||
|
# quantization-ci:
|
||||||
|
# name: Quantization CI
|
||||||
|
# uses: ./.github/workflows/self-scheduled.yml
|
||||||
|
# with:
|
||||||
|
# job: run_quantization_torch_gpu
|
||||||
|
# slack_report_channel: "#transformers-ci-daily-quantization"
|
||||||
|
# runner: daily-ci
|
||||||
|
# docker: huggingface/transformers-quantization-latest-gpu
|
||||||
|
# ci_event: Daily CI
|
||||||
|
# report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||||
|
# secrets: inherit
|
||||||
|
|||||||
10
.github/workflows/self-scheduled.yml
vendored
10
.github/workflows/self-scheduled.yml
vendored
@ -15,9 +15,6 @@ on:
|
|||||||
slack_report_channel:
|
slack_report_channel:
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
runner:
|
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
docker:
|
docker:
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
@ -62,6 +59,7 @@ jobs:
|
|||||||
outputs:
|
outputs:
|
||||||
folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
|
folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
|
||||||
slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
|
slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
|
||||||
|
runner_map: ${{ steps.set-matrix.outputs.runner_map }}
|
||||||
quantization_matrix: ${{ steps.set-matrix-quantization.outputs.quantization_matrix }}
|
quantization_matrix: ${{ steps.set-matrix-quantization.outputs.quantization_matrix }}
|
||||||
steps:
|
steps:
|
||||||
- name: Update clone
|
- name: Update clone
|
||||||
@ -88,6 +86,7 @@ jobs:
|
|||||||
if [ "${{ inputs.job }}" = "run_models_gpu" ]; then
|
if [ "${{ inputs.job }}" = "run_models_gpu" ]; then
|
||||||
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
|
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
|
||||||
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
|
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
|
||||||
|
echo "runner_map=$(python3 ../utils/get_runner_map.py)" >> $GITHUB_OUTPUT
|
||||||
elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then
|
elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then
|
||||||
echo "folder_slices=[['trainer'], ['fsdp']]" >> $GITHUB_OUTPUT
|
echo "folder_slices=[['trainer'], ['fsdp']]" >> $GITHUB_OUTPUT
|
||||||
echo "slice_ids=[0, 1]" >> $GITHUB_OUTPUT
|
echo "slice_ids=[0, 1]" >> $GITHUB_OUTPUT
|
||||||
@ -111,14 +110,14 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache]
|
machine_type: [single-gpu, multi-gpu]
|
||||||
slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
|
slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
|
||||||
uses: ./.github/workflows/model_jobs.yml
|
uses: ./.github/workflows/model_jobs.yml
|
||||||
with:
|
with:
|
||||||
folder_slices: ${{ needs.setup.outputs.folder_slices }}
|
folder_slices: ${{ needs.setup.outputs.folder_slices }}
|
||||||
machine_type: ${{ matrix.machine_type }}
|
machine_type: ${{ matrix.machine_type }}
|
||||||
slice_id: ${{ matrix.slice_id }}
|
slice_id: ${{ matrix.slice_id }}
|
||||||
runner: ${{ inputs.runner }}
|
runner_map: ${{ needs.setup.outputs.runner_map }}
|
||||||
docker: ${{ inputs.docker }}
|
docker: ${{ inputs.docker }}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
@ -136,7 +135,6 @@ jobs:
|
|||||||
folder_slices: ${{ needs.setup.outputs.folder_slices }}
|
folder_slices: ${{ needs.setup.outputs.folder_slices }}
|
||||||
machine_type: ${{ matrix.machine_type }}
|
machine_type: ${{ matrix.machine_type }}
|
||||||
slice_id: ${{ matrix.slice_id }}
|
slice_id: ${{ matrix.slice_id }}
|
||||||
runner: ${{ inputs.runner }}
|
|
||||||
docker: ${{ inputs.docker }}
|
docker: ${{ inputs.docker }}
|
||||||
report_name_prefix: run_trainer_and_fsdp_gpu
|
report_name_prefix: run_trainer_and_fsdp_gpu
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|||||||
18
Makefile
18
Makefile
@ -8,19 +8,13 @@ check_dirs := examples tests src utils
|
|||||||
exclude_folders := ""
|
exclude_folders := ""
|
||||||
|
|
||||||
modified_only_fixup:
|
modified_only_fixup:
|
||||||
@current_branch=$$(git branch --show-current); \
|
$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
|
||||||
if [ "$$current_branch" = "main" ]; then \
|
@if test -n "$(modified_py_files)"; then \
|
||||||
echo "On main branch, running 'style' target instead..."; \
|
echo "Checking/fixing $(modified_py_files)"; \
|
||||||
$(MAKE) style; \
|
ruff check $(modified_py_files) --fix --exclude $(exclude_folders); \
|
||||||
|
ruff format $(modified_py_files) --exclude $(exclude_folders);\
|
||||||
else \
|
else \
|
||||||
modified_py_files=$$(python utils/get_modified_files.py $(check_dirs)); \
|
echo "No library .py files were modified"; \
|
||||||
if [ -n "$$modified_py_files" ]; then \
|
|
||||||
echo "Checking/fixing files: $${modified_py_files}"; \
|
|
||||||
ruff check $${modified_py_files} --fix --exclude $(exclude_folders); \
|
|
||||||
ruff format $${modified_py_files} --exclude $(exclude_folders); \
|
|
||||||
else \
|
|
||||||
echo "No library .py files were modified"; \
|
|
||||||
fi; \
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Update src/transformers/dependency_versions_table.py
|
# Update src/transformers/dependency_versions_table.py
|
||||||
|
|||||||
@ -56,7 +56,7 @@ Here is how to use the processor to process text and audio:
|
|||||||
```python
|
```python
|
||||||
>>> # let's load an audio sample from an Arabic speech corpus
|
>>> # let's load an audio sample from an Arabic speech corpus
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
>>> dataset = load_dataset("halabi2016/arabic_speech_corpus", split="test", streaming=True)
|
>>> dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True, trust_remote_code=True)
|
||||||
>>> audio_sample = next(iter(dataset))["audio"]
|
>>> audio_sample = next(iter(dataset))["audio"]
|
||||||
|
|
||||||
>>> # now, process it
|
>>> # now, process it
|
||||||
|
|||||||
@ -56,7 +56,7 @@ Here is how to use the processor to process text and audio:
|
|||||||
```python
|
```python
|
||||||
>>> # let's load an audio sample from an Arabic speech corpus
|
>>> # let's load an audio sample from an Arabic speech corpus
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
>>> dataset = load_dataset("halabi2016/arabic_speech_corpus", split="test", streaming=True)
|
>>> dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True, trust_remote_code=True)
|
||||||
>>> audio_sample = next(iter(dataset))["audio"]
|
>>> audio_sample = next(iter(dataset))["audio"]
|
||||||
|
|
||||||
>>> # now, process it
|
>>> # now, process it
|
||||||
|
|||||||
@ -493,33 +493,6 @@ training_args = TrainingArguments(
|
|||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
You can also configure which specific kernels to apply using the `liger_kernel_config` parameter. This dict is passed as keyword arguments to the `_apply_liger_kernel_to_instance` function, allowing fine-grained control over kernel usage. Available options vary by model but typically include: `rope`, `swiglu`, `cross_entropy`, `fused_linear_cross_entropy`, `rms_norm`, etc.
|
|
||||||
|
|
||||||
```py
|
|
||||||
from transformers import TrainingArguments
|
|
||||||
|
|
||||||
# Apply only specific kernels
|
|
||||||
training_args = TrainingArguments(
|
|
||||||
output_dir="your-model",
|
|
||||||
learning_rate=2e-5,
|
|
||||||
per_device_train_batch_size=16,
|
|
||||||
per_device_eval_batch_size=16,
|
|
||||||
num_train_epochs=2,
|
|
||||||
weight_decay=0.01,
|
|
||||||
eval_strategy="epoch",
|
|
||||||
save_strategy="epoch",
|
|
||||||
load_best_model_at_end=True,
|
|
||||||
push_to_hub=True,
|
|
||||||
use_liger_kernel=True,
|
|
||||||
liger_kernel_config={
|
|
||||||
"rope": True,
|
|
||||||
"cross_entropy": True,
|
|
||||||
"rms_norm": False, # Don't apply Liger's RMSNorm kernel
|
|
||||||
"swiglu": True,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### NEFTune
|
### NEFTune
|
||||||
|
|
||||||
[NEFTune](https://hf.co/papers/2310.05914) adds noise to the embedding vectors during training to improve model performance. Enable it in [`Trainer`] with the `neftune_noise_alpha` parameter in [`TrainingArguments`] to control how much noise is added.
|
[NEFTune](https://hf.co/papers/2310.05914) adds noise to the embedding vectors during training to improve model performance. Enable it in [`Trainer`] with the `neftune_noise_alpha` parameter in [`TrainingArguments`] to control how much noise is added.
|
||||||
|
|||||||
@ -264,6 +264,7 @@ class ExamplesTests(TestCasePlus):
|
|||||||
--dataset_config clean
|
--dataset_config clean
|
||||||
--train_split_name validation
|
--train_split_name validation
|
||||||
--eval_split_name validation
|
--eval_split_name validation
|
||||||
|
--trust_remote_code
|
||||||
--output_dir {tmp_dir}
|
--output_dir {tmp_dir}
|
||||||
--overwrite_output_dir
|
--overwrite_output_dir
|
||||||
--num_train_epochs=2
|
--num_train_epochs=2
|
||||||
|
|||||||
@ -312,6 +312,7 @@ class ExamplesTestsNoTrainer(TestCasePlus):
|
|||||||
{self.examples_dir}/pytorch/image-classification/run_image_classification_no_trainer.py
|
{self.examples_dir}/pytorch/image-classification/run_image_classification_no_trainer.py
|
||||||
--model_name_or_path google/vit-base-patch16-224-in21k
|
--model_name_or_path google/vit-base-patch16-224-in21k
|
||||||
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
||||||
|
--trust_remote_code
|
||||||
--learning_rate 1e-4
|
--learning_rate 1e-4
|
||||||
--per_device_train_batch_size 2
|
--per_device_train_batch_size 2
|
||||||
--per_device_eval_batch_size 1
|
--per_device_eval_batch_size 1
|
||||||
|
|||||||
@ -390,6 +390,7 @@ class ExamplesTests(TestCasePlus):
|
|||||||
--output_dir {tmp_dir}
|
--output_dir {tmp_dir}
|
||||||
--model_name_or_path google/vit-base-patch16-224-in21k
|
--model_name_or_path google/vit-base-patch16-224-in21k
|
||||||
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
||||||
|
--trust_remote_code
|
||||||
--do_train
|
--do_train
|
||||||
--do_eval
|
--do_eval
|
||||||
--learning_rate 1e-4
|
--learning_rate 1e-4
|
||||||
@ -423,6 +424,7 @@ class ExamplesTests(TestCasePlus):
|
|||||||
--dataset_config_name clean
|
--dataset_config_name clean
|
||||||
--train_split_name validation
|
--train_split_name validation
|
||||||
--eval_split_name validation
|
--eval_split_name validation
|
||||||
|
--trust_remote_code
|
||||||
--do_train
|
--do_train
|
||||||
--do_eval
|
--do_eval
|
||||||
--learning_rate 1e-4
|
--learning_rate 1e-4
|
||||||
@ -453,6 +455,7 @@ class ExamplesTests(TestCasePlus):
|
|||||||
--dataset_config_name clean
|
--dataset_config_name clean
|
||||||
--train_split_name validation
|
--train_split_name validation
|
||||||
--eval_split_name validation
|
--eval_split_name validation
|
||||||
|
--trust_remote_code
|
||||||
--do_train
|
--do_train
|
||||||
--do_eval
|
--do_eval
|
||||||
--learning_rate 1e-4
|
--learning_rate 1e-4
|
||||||
@ -485,6 +488,7 @@ class ExamplesTests(TestCasePlus):
|
|||||||
--dataset_config_name clean
|
--dataset_config_name clean
|
||||||
--train_split_name validation
|
--train_split_name validation
|
||||||
--eval_split_name validation
|
--eval_split_name validation
|
||||||
|
--trust_remote_code
|
||||||
--do_train
|
--do_train
|
||||||
--do_eval
|
--do_eval
|
||||||
--learning_rate 1e-4
|
--learning_rate 1e-4
|
||||||
@ -512,6 +516,7 @@ class ExamplesTests(TestCasePlus):
|
|||||||
--output_dir {tmp_dir}
|
--output_dir {tmp_dir}
|
||||||
--model_name_or_path hf-internal-testing/tiny-random-wav2vec2
|
--model_name_or_path hf-internal-testing/tiny-random-wav2vec2
|
||||||
--dataset_name anton-l/superb_demo
|
--dataset_name anton-l/superb_demo
|
||||||
|
--trust_remote_code
|
||||||
--dataset_config_name ks
|
--dataset_config_name ks
|
||||||
--train_split_name test
|
--train_split_name test
|
||||||
--eval_split_name test
|
--eval_split_name test
|
||||||
@ -546,6 +551,7 @@ class ExamplesTests(TestCasePlus):
|
|||||||
--dataset_name hf-internal-testing/librispeech_asr_dummy
|
--dataset_name hf-internal-testing/librispeech_asr_dummy
|
||||||
--dataset_config_names clean
|
--dataset_config_names clean
|
||||||
--dataset_split_names validation
|
--dataset_split_names validation
|
||||||
|
--trust_remote_code
|
||||||
--learning_rate 1e-4
|
--learning_rate 1e-4
|
||||||
--per_device_train_batch_size 4
|
--per_device_train_batch_size 4
|
||||||
--per_device_eval_batch_size 4
|
--per_device_eval_batch_size 4
|
||||||
@ -566,6 +572,7 @@ class ExamplesTests(TestCasePlus):
|
|||||||
run_mae.py
|
run_mae.py
|
||||||
--output_dir {tmp_dir}
|
--output_dir {tmp_dir}
|
||||||
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
||||||
|
--trust_remote_code
|
||||||
--do_train
|
--do_train
|
||||||
--do_eval
|
--do_eval
|
||||||
--learning_rate 1e-4
|
--learning_rate 1e-4
|
||||||
|
|||||||
@ -315,6 +315,7 @@ class ExamplesTests(TestCasePlus):
|
|||||||
testargs = f"""
|
testargs = f"""
|
||||||
run_image_classification.py
|
run_image_classification.py
|
||||||
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
||||||
|
--trust_remote_code
|
||||||
--model_name_or_path microsoft/resnet-18
|
--model_name_or_path microsoft/resnet-18
|
||||||
--do_train
|
--do_train
|
||||||
--do_eval
|
--do_eval
|
||||||
|
|||||||
@ -29,6 +29,7 @@ import warnings
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
from dataclasses import dataclass
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from functools import partial, wraps
|
from functools import partial, wraps
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
@ -40,6 +41,7 @@ from huggingface_hub import split_torch_state_dict_into_shards
|
|||||||
from packaging import version
|
from packaging import version
|
||||||
from torch import Tensor, nn
|
from torch import Tensor, nn
|
||||||
from torch.distributions import constraints
|
from torch.distributions import constraints
|
||||||
|
from torch.nn import CrossEntropyLoss, Identity
|
||||||
from torch.utils.checkpoint import checkpoint
|
from torch.utils.checkpoint import checkpoint
|
||||||
|
|
||||||
from transformers.utils import is_torchao_available
|
from transformers.utils import is_torchao_available
|
||||||
@ -48,6 +50,7 @@ from transformers.utils import is_torchao_available
|
|||||||
if is_torchao_available():
|
if is_torchao_available():
|
||||||
from torchao.quantization import Int4WeightOnlyConfig
|
from torchao.quantization import Int4WeightOnlyConfig
|
||||||
|
|
||||||
|
from .activations import get_activation
|
||||||
from .configuration_utils import PretrainedConfig
|
from .configuration_utils import PretrainedConfig
|
||||||
from .dynamic_module_utils import custom_object_save
|
from .dynamic_module_utils import custom_object_save
|
||||||
from .generation import CompileConfig, GenerationConfig
|
from .generation import CompileConfig, GenerationConfig
|
||||||
@ -95,6 +98,7 @@ from .utils import (
|
|||||||
WEIGHTS_INDEX_NAME,
|
WEIGHTS_INDEX_NAME,
|
||||||
WEIGHTS_NAME,
|
WEIGHTS_NAME,
|
||||||
ContextManagers,
|
ContextManagers,
|
||||||
|
ModelOutput,
|
||||||
PushToHubMixin,
|
PushToHubMixin,
|
||||||
cached_file,
|
cached_file,
|
||||||
check_torch_load_is_safe,
|
check_torch_load_is_safe,
|
||||||
@ -119,6 +123,7 @@ from .utils import (
|
|||||||
is_torch_xla_available,
|
is_torch_xla_available,
|
||||||
is_torch_xpu_available,
|
is_torch_xpu_available,
|
||||||
logging,
|
logging,
|
||||||
|
replace_return_docstrings,
|
||||||
strtobool,
|
strtobool,
|
||||||
)
|
)
|
||||||
from .utils.generic import GeneralInterface
|
from .utils.generic import GeneralInterface
|
||||||
@ -5619,6 +5624,453 @@ if PreTrainedModel.push_to_hub.__doc__ is not None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PoolerStartLogits(nn.Module):
|
||||||
|
"""
|
||||||
|
Compute SQuAD start logits from sequence hidden states.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config ([`PretrainedConfig`]):
|
||||||
|
The config used by the model, will be used to grab the `hidden_size` of the model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: PretrainedConfig):
|
||||||
|
super().__init__()
|
||||||
|
self.dense = nn.Linear(config.hidden_size, 1)
|
||||||
|
logger.warning_once(
|
||||||
|
"[DEPRECATION WARNING] `PoolerStartLogits` is deprecated and will be removed in v4.53. "
|
||||||
|
"Please use model-specific class, e.g. `XLMPoolerStartLogits`."
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self, hidden_states: torch.FloatTensor, p_mask: Optional[torch.FloatTensor] = None
|
||||||
|
) -> torch.FloatTensor:
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
hidden_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`):
|
||||||
|
The final hidden states of the model.
|
||||||
|
p_mask (`torch.FloatTensor` of shape `(batch_size, seq_len)`, *optional*):
|
||||||
|
Mask for tokens at invalid position, such as query and special symbols (PAD, SEP, CLS). 1.0 means token
|
||||||
|
should be masked.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
`torch.FloatTensor`: The start logits for SQuAD.
|
||||||
|
"""
|
||||||
|
x = self.dense(hidden_states).squeeze(-1)
|
||||||
|
|
||||||
|
if p_mask is not None:
|
||||||
|
if get_parameter_dtype(self) == torch.float16:
|
||||||
|
x = x * (1 - p_mask) - 65500 * p_mask
|
||||||
|
else:
|
||||||
|
x = x * (1 - p_mask) - 1e30 * p_mask
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class PoolerEndLogits(nn.Module):
|
||||||
|
"""
|
||||||
|
Compute SQuAD end logits from sequence hidden states.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config ([`PretrainedConfig`]):
|
||||||
|
The config used by the model, will be used to grab the `hidden_size` of the model and the `layer_norm_eps`
|
||||||
|
to use.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: PretrainedConfig):
|
||||||
|
super().__init__()
|
||||||
|
self.dense_0 = nn.Linear(config.hidden_size * 2, config.hidden_size)
|
||||||
|
self.activation = nn.Tanh()
|
||||||
|
self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
|
||||||
|
self.dense_1 = nn.Linear(config.hidden_size, 1)
|
||||||
|
logger.warning_once(
|
||||||
|
"[DEPRECATION WARNING] `PoolerEndLogits` is deprecated and will be removed in v4.53. "
|
||||||
|
"Please use model-specific class, e.g. `XLMPoolerEndLogits`."
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self,
|
||||||
|
hidden_states: torch.FloatTensor,
|
||||||
|
start_states: Optional[torch.FloatTensor] = None,
|
||||||
|
start_positions: Optional[torch.LongTensor] = None,
|
||||||
|
p_mask: Optional[torch.FloatTensor] = None,
|
||||||
|
) -> torch.FloatTensor:
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
hidden_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`):
|
||||||
|
The final hidden states of the model.
|
||||||
|
start_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`, *optional*):
|
||||||
|
The hidden states of the first tokens for the labeled span.
|
||||||
|
start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
||||||
|
The position of the first token for the labeled span.
|
||||||
|
p_mask (`torch.FloatTensor` of shape `(batch_size, seq_len)`, *optional*):
|
||||||
|
Mask for tokens at invalid position, such as query and special symbols (PAD, SEP, CLS). 1.0 means token
|
||||||
|
should be masked.
|
||||||
|
|
||||||
|
<Tip>
|
||||||
|
|
||||||
|
One of `start_states` or `start_positions` should be not `None`. If both are set, `start_positions` overrides
|
||||||
|
`start_states`.
|
||||||
|
|
||||||
|
</Tip>
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
`torch.FloatTensor`: The end logits for SQuAD.
|
||||||
|
"""
|
||||||
|
assert start_states is not None or start_positions is not None, (
|
||||||
|
"One of start_states, start_positions should be not None"
|
||||||
|
)
|
||||||
|
if start_positions is not None:
|
||||||
|
slen, hsz = hidden_states.shape[-2:]
|
||||||
|
start_positions = start_positions[:, None, None].expand(-1, -1, hsz) # shape (bsz, 1, hsz)
|
||||||
|
start_states = hidden_states.gather(-2, start_positions) # shape (bsz, 1, hsz)
|
||||||
|
start_states = start_states.expand(-1, slen, -1) # shape (bsz, slen, hsz)
|
||||||
|
|
||||||
|
x = self.dense_0(torch.cat([hidden_states, start_states], dim=-1))
|
||||||
|
x = self.activation(x)
|
||||||
|
x = self.LayerNorm(x)
|
||||||
|
x = self.dense_1(x).squeeze(-1)
|
||||||
|
|
||||||
|
if p_mask is not None:
|
||||||
|
if get_parameter_dtype(self) == torch.float16:
|
||||||
|
x = x * (1 - p_mask) - 65500 * p_mask
|
||||||
|
else:
|
||||||
|
x = x * (1 - p_mask) - 1e30 * p_mask
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class PoolerAnswerClass(nn.Module):
|
||||||
|
"""
|
||||||
|
Compute SQuAD 2.0 answer class from classification and start tokens hidden states.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config ([`PretrainedConfig`]):
|
||||||
|
The config used by the model, will be used to grab the `hidden_size` of the model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config):
|
||||||
|
super().__init__()
|
||||||
|
self.dense_0 = nn.Linear(config.hidden_size * 2, config.hidden_size)
|
||||||
|
self.activation = nn.Tanh()
|
||||||
|
self.dense_1 = nn.Linear(config.hidden_size, 1, bias=False)
|
||||||
|
logger.warning_once(
|
||||||
|
"[DEPRECATION WARNING] `PoolerAnswerClass` is deprecated and will be removed in v4.53. "
|
||||||
|
"Please use model-specific class, e.g. `XLMPoolerAnswerClass`."
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self,
|
||||||
|
hidden_states: torch.FloatTensor,
|
||||||
|
start_states: Optional[torch.FloatTensor] = None,
|
||||||
|
start_positions: Optional[torch.LongTensor] = None,
|
||||||
|
cls_index: Optional[torch.LongTensor] = None,
|
||||||
|
) -> torch.FloatTensor:
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
hidden_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`):
|
||||||
|
The final hidden states of the model.
|
||||||
|
start_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`, *optional*):
|
||||||
|
The hidden states of the first tokens for the labeled span.
|
||||||
|
start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
||||||
|
The position of the first token for the labeled span.
|
||||||
|
cls_index (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
||||||
|
Position of the CLS token for each sentence in the batch. If `None`, takes the last token.
|
||||||
|
|
||||||
|
<Tip>
|
||||||
|
|
||||||
|
One of `start_states` or `start_positions` should be not `None`. If both are set, `start_positions` overrides
|
||||||
|
`start_states`.
|
||||||
|
|
||||||
|
</Tip>
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
`torch.FloatTensor`: The SQuAD 2.0 answer class.
|
||||||
|
"""
|
||||||
|
# No dependency on end_feature so that we can obtain one single `cls_logits` for each sample.
|
||||||
|
hsz = hidden_states.shape[-1]
|
||||||
|
assert start_states is not None or start_positions is not None, (
|
||||||
|
"One of start_states, start_positions should be not None"
|
||||||
|
)
|
||||||
|
if start_positions is not None:
|
||||||
|
start_positions = start_positions[:, None, None].expand(-1, -1, hsz) # shape (bsz, 1, hsz)
|
||||||
|
start_states = hidden_states.gather(-2, start_positions).squeeze(-2) # shape (bsz, hsz)
|
||||||
|
|
||||||
|
if cls_index is not None:
|
||||||
|
cls_index = cls_index[:, None, None].expand(-1, -1, hsz) # shape (bsz, 1, hsz)
|
||||||
|
cls_token_state = hidden_states.gather(-2, cls_index).squeeze(-2) # shape (bsz, hsz)
|
||||||
|
else:
|
||||||
|
cls_token_state = hidden_states[:, -1, :] # shape (bsz, hsz)
|
||||||
|
|
||||||
|
x = self.dense_0(torch.cat([start_states, cls_token_state], dim=-1))
|
||||||
|
x = self.activation(x)
|
||||||
|
x = self.dense_1(x).squeeze(-1)
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SquadHeadOutput(ModelOutput):
|
||||||
|
"""
|
||||||
|
Base class for outputs of question answering models using a [`~modeling_utils.SQuADHead`].
|
||||||
|
|
||||||
|
Args:
|
||||||
|
loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned if both `start_positions` and `end_positions` are provided):
|
||||||
|
Classification loss as the sum of start token, end token (and is_impossible if provided) classification
|
||||||
|
losses.
|
||||||
|
start_top_log_probs (`torch.FloatTensor` of shape `(batch_size, config.start_n_top)`, *optional*, returned if `start_positions` or `end_positions` is not provided):
|
||||||
|
Log probabilities for the top config.start_n_top start token possibilities (beam-search).
|
||||||
|
start_top_index (`torch.LongTensor` of shape `(batch_size, config.start_n_top)`, *optional*, returned if `start_positions` or `end_positions` is not provided):
|
||||||
|
Indices for the top config.start_n_top start token possibilities (beam-search).
|
||||||
|
end_top_log_probs (`torch.FloatTensor` of shape `(batch_size, config.start_n_top * config.end_n_top)`, *optional*, returned if `start_positions` or `end_positions` is not provided):
|
||||||
|
Log probabilities for the top `config.start_n_top * config.end_n_top` end token possibilities
|
||||||
|
(beam-search).
|
||||||
|
end_top_index (`torch.LongTensor` of shape `(batch_size, config.start_n_top * config.end_n_top)`, *optional*, returned if `start_positions` or `end_positions` is not provided):
|
||||||
|
Indices for the top `config.start_n_top * config.end_n_top` end token possibilities (beam-search).
|
||||||
|
cls_logits (`torch.FloatTensor` of shape `(batch_size,)`, *optional*, returned if `start_positions` or `end_positions` is not provided):
|
||||||
|
Log probabilities for the `is_impossible` label of the answers.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
loss: Optional[torch.FloatTensor] = None
|
||||||
|
start_top_log_probs: Optional[torch.FloatTensor] = None
|
||||||
|
start_top_index: Optional[torch.LongTensor] = None
|
||||||
|
end_top_log_probs: Optional[torch.FloatTensor] = None
|
||||||
|
end_top_index: Optional[torch.LongTensor] = None
|
||||||
|
cls_logits: Optional[torch.FloatTensor] = None
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
logger.warning_once(
|
||||||
|
"[DEPRECATION WARNING] `SquadHeadOutput` is deprecated and will be removed in v4.53. "
|
||||||
|
"Please use model-specific class, e.g. `XLMSquadHeadOutput`."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SQuADHead(nn.Module):
|
||||||
|
r"""
|
||||||
|
A SQuAD head inspired by XLNet.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config ([`PretrainedConfig`]):
|
||||||
|
The config used by the model, will be used to grab the `hidden_size` of the model and the `layer_norm_eps`
|
||||||
|
to use.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config):
|
||||||
|
super().__init__()
|
||||||
|
self.start_n_top = config.start_n_top
|
||||||
|
self.end_n_top = config.end_n_top
|
||||||
|
|
||||||
|
self.start_logits = PoolerStartLogits(config)
|
||||||
|
self.end_logits = PoolerEndLogits(config)
|
||||||
|
self.answer_class = PoolerAnswerClass(config)
|
||||||
|
|
||||||
|
logger.warning_once(
|
||||||
|
"[DEPRECATION WARNING] `SQuADHead` is deprecated and will be removed in v4.53. "
|
||||||
|
"Please use model-specific class, e.g. `XLMSQuADHead`."
|
||||||
|
)
|
||||||
|
|
||||||
|
@replace_return_docstrings(output_type=SquadHeadOutput, config_class=PretrainedConfig)
|
||||||
|
def forward(
|
||||||
|
self,
|
||||||
|
hidden_states: torch.FloatTensor,
|
||||||
|
start_positions: Optional[torch.LongTensor] = None,
|
||||||
|
end_positions: Optional[torch.LongTensor] = None,
|
||||||
|
cls_index: Optional[torch.LongTensor] = None,
|
||||||
|
is_impossible: Optional[torch.LongTensor] = None,
|
||||||
|
p_mask: Optional[torch.FloatTensor] = None,
|
||||||
|
return_dict: bool = False,
|
||||||
|
) -> Union[SquadHeadOutput, tuple[torch.FloatTensor]]:
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
hidden_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`):
|
||||||
|
Final hidden states of the model on the sequence tokens.
|
||||||
|
start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
||||||
|
Positions of the first token for the labeled span.
|
||||||
|
end_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
||||||
|
Positions of the last token for the labeled span.
|
||||||
|
cls_index (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
||||||
|
Position of the CLS token for each sentence in the batch. If `None`, takes the last token.
|
||||||
|
is_impossible (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
||||||
|
Whether the question has a possible answer in the paragraph or not.
|
||||||
|
p_mask (`torch.FloatTensor` of shape `(batch_size, seq_len)`, *optional*):
|
||||||
|
Mask for tokens at invalid position, such as query and special symbols (PAD, SEP, CLS). 1.0 means token
|
||||||
|
should be masked.
|
||||||
|
return_dict (`bool`, *optional*, defaults to `False`):
|
||||||
|
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
"""
|
||||||
|
start_logits = self.start_logits(hidden_states, p_mask=p_mask)
|
||||||
|
|
||||||
|
if start_positions is not None and end_positions is not None:
|
||||||
|
# If we are on multi-GPU, let's remove the dimension added by batch splitting
|
||||||
|
for x in (start_positions, end_positions, cls_index, is_impossible):
|
||||||
|
if x is not None and x.dim() > 1:
|
||||||
|
x.squeeze_(-1)
|
||||||
|
|
||||||
|
# during training, compute the end logits based on the ground truth of the start position
|
||||||
|
end_logits = self.end_logits(hidden_states, start_positions=start_positions, p_mask=p_mask)
|
||||||
|
|
||||||
|
loss_fct = CrossEntropyLoss()
|
||||||
|
start_loss = loss_fct(start_logits, start_positions)
|
||||||
|
end_loss = loss_fct(end_logits, end_positions)
|
||||||
|
total_loss = (start_loss + end_loss) / 2
|
||||||
|
|
||||||
|
if cls_index is not None and is_impossible is not None:
|
||||||
|
# Predict answerability from the representation of CLS and START
|
||||||
|
cls_logits = self.answer_class(hidden_states, start_positions=start_positions, cls_index=cls_index)
|
||||||
|
loss_fct_cls = nn.BCEWithLogitsLoss()
|
||||||
|
cls_loss = loss_fct_cls(cls_logits, is_impossible)
|
||||||
|
|
||||||
|
# note(zhiliny): by default multiply the loss by 0.5 so that the scale is comparable to start_loss and end_loss
|
||||||
|
total_loss += cls_loss * 0.5
|
||||||
|
|
||||||
|
return SquadHeadOutput(loss=total_loss) if return_dict else (total_loss,)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# during inference, compute the end logits based on beam search
|
||||||
|
bsz, slen, hsz = hidden_states.size()
|
||||||
|
start_log_probs = nn.functional.softmax(start_logits, dim=-1) # shape (bsz, slen)
|
||||||
|
|
||||||
|
start_top_log_probs, start_top_index = torch.topk(
|
||||||
|
start_log_probs, self.start_n_top, dim=-1
|
||||||
|
) # shape (bsz, start_n_top)
|
||||||
|
start_top_index_exp = start_top_index.unsqueeze(-1).expand(-1, -1, hsz) # shape (bsz, start_n_top, hsz)
|
||||||
|
start_states = torch.gather(hidden_states, -2, start_top_index_exp) # shape (bsz, start_n_top, hsz)
|
||||||
|
start_states = start_states.unsqueeze(1).expand(-1, slen, -1, -1) # shape (bsz, slen, start_n_top, hsz)
|
||||||
|
|
||||||
|
hidden_states_expanded = hidden_states.unsqueeze(2).expand_as(
|
||||||
|
start_states
|
||||||
|
) # shape (bsz, slen, start_n_top, hsz)
|
||||||
|
p_mask = p_mask.unsqueeze(-1) if p_mask is not None else None
|
||||||
|
end_logits = self.end_logits(hidden_states_expanded, start_states=start_states, p_mask=p_mask)
|
||||||
|
end_log_probs = nn.functional.softmax(end_logits, dim=1) # shape (bsz, slen, start_n_top)
|
||||||
|
|
||||||
|
end_top_log_probs, end_top_index = torch.topk(
|
||||||
|
end_log_probs, self.end_n_top, dim=1
|
||||||
|
) # shape (bsz, end_n_top, start_n_top)
|
||||||
|
end_top_log_probs = end_top_log_probs.view(-1, self.start_n_top * self.end_n_top)
|
||||||
|
end_top_index = end_top_index.view(-1, self.start_n_top * self.end_n_top)
|
||||||
|
|
||||||
|
start_states = torch.einsum("blh,bl->bh", hidden_states, start_log_probs)
|
||||||
|
cls_logits = self.answer_class(hidden_states, start_states=start_states, cls_index=cls_index)
|
||||||
|
|
||||||
|
if not return_dict:
|
||||||
|
return (start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits)
|
||||||
|
else:
|
||||||
|
return SquadHeadOutput(
|
||||||
|
start_top_log_probs=start_top_log_probs,
|
||||||
|
start_top_index=start_top_index,
|
||||||
|
end_top_log_probs=end_top_log_probs,
|
||||||
|
end_top_index=end_top_index,
|
||||||
|
cls_logits=cls_logits,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SequenceSummary(nn.Module):
|
||||||
|
r"""
|
||||||
|
Compute a single vector summary of a sequence hidden states.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config ([`PretrainedConfig`]):
|
||||||
|
The config used by the model. Relevant arguments in the config class of the model are (refer to the actual
|
||||||
|
config class of your model for the default values it uses):
|
||||||
|
|
||||||
|
- **summary_type** (`str`) -- The method to use to make this summary. Accepted values are:
|
||||||
|
|
||||||
|
- `"last"` -- Take the last token hidden state (like XLNet)
|
||||||
|
- `"first"` -- Take the first token hidden state (like Bert)
|
||||||
|
- `"mean"` -- Take the mean of all tokens hidden states
|
||||||
|
- `"cls_index"` -- Supply a Tensor of classification token position (GPT/GPT-2)
|
||||||
|
- `"attn"` -- Not implemented now, use multi-head attention
|
||||||
|
|
||||||
|
- **summary_use_proj** (`bool`) -- Add a projection after the vector extraction.
|
||||||
|
- **summary_proj_to_labels** (`bool`) -- If `True`, the projection outputs to `config.num_labels` classes
|
||||||
|
(otherwise to `config.hidden_size`).
|
||||||
|
- **summary_activation** (`Optional[str]`) -- Set to `"tanh"` to add a tanh activation to the output,
|
||||||
|
another string or `None` will add no activation.
|
||||||
|
- **summary_first_dropout** (`float`) -- Optional dropout probability before the projection and activation.
|
||||||
|
- **summary_last_dropout** (`float`)-- Optional dropout probability after the projection and activation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: PretrainedConfig):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.summary_type = getattr(config, "summary_type", "last")
|
||||||
|
if self.summary_type == "attn":
|
||||||
|
# We should use a standard multi-head attention module with absolute positional embedding for that.
|
||||||
|
# Cf. https://github.com/zihangdai/xlnet/blob/master/modeling.py#L253-L276
|
||||||
|
# We can probably just use the multi-head attention module of PyTorch >=1.1.0
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
self.summary = Identity()
|
||||||
|
if hasattr(config, "summary_use_proj") and config.summary_use_proj:
|
||||||
|
if hasattr(config, "summary_proj_to_labels") and config.summary_proj_to_labels and config.num_labels > 0:
|
||||||
|
num_classes = config.num_labels
|
||||||
|
else:
|
||||||
|
num_classes = config.hidden_size
|
||||||
|
self.summary = nn.Linear(config.hidden_size, num_classes)
|
||||||
|
|
||||||
|
activation_string = getattr(config, "summary_activation", None)
|
||||||
|
self.activation: Callable = get_activation(activation_string) if activation_string else Identity()
|
||||||
|
|
||||||
|
self.first_dropout = Identity()
|
||||||
|
if hasattr(config, "summary_first_dropout") and config.summary_first_dropout > 0:
|
||||||
|
self.first_dropout = nn.Dropout(config.summary_first_dropout)
|
||||||
|
|
||||||
|
self.last_dropout = Identity()
|
||||||
|
if hasattr(config, "summary_last_dropout") and config.summary_last_dropout > 0:
|
||||||
|
self.last_dropout = nn.Dropout(config.summary_last_dropout)
|
||||||
|
|
||||||
|
logger.warning_once(
|
||||||
|
"[DEPRECATION WARNING] `SequenceSummary` is deprecated and will be removed in v4.53. "
|
||||||
|
"Please use model-specific class, e.g. `XLMSequenceSummary`."
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self, hidden_states: torch.FloatTensor, cls_index: Optional[torch.LongTensor] = None
|
||||||
|
) -> torch.FloatTensor:
|
||||||
|
"""
|
||||||
|
Compute a single vector summary of a sequence hidden states.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hidden_states (`torch.FloatTensor` of shape `[batch_size, seq_len, hidden_size]`):
|
||||||
|
The hidden states of the last layer.
|
||||||
|
cls_index (`torch.LongTensor` of shape `[batch_size]` or `[batch_size, ...]` where ... are optional leading dimensions of `hidden_states`, *optional*):
|
||||||
|
Used if `summary_type == "cls_index"` and takes the last token of the sequence as classification token.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
`torch.FloatTensor`: The summary of the sequence hidden states.
|
||||||
|
"""
|
||||||
|
if self.summary_type == "last":
|
||||||
|
output = hidden_states[:, -1]
|
||||||
|
elif self.summary_type == "first":
|
||||||
|
output = hidden_states[:, 0]
|
||||||
|
elif self.summary_type == "mean":
|
||||||
|
output = hidden_states.mean(dim=1)
|
||||||
|
elif self.summary_type == "cls_index":
|
||||||
|
if cls_index is None:
|
||||||
|
cls_index = torch.full_like(
|
||||||
|
hidden_states[..., :1, :],
|
||||||
|
hidden_states.shape[-2] - 1,
|
||||||
|
dtype=torch.long,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
cls_index = cls_index.unsqueeze(-1).unsqueeze(-1)
|
||||||
|
cls_index = cls_index.expand((-1,) * (cls_index.dim() - 1) + (hidden_states.size(-1),))
|
||||||
|
# shape of cls_index: (bsz, XX, 1, hidden_size) where XX are optional leading dim of hidden_states
|
||||||
|
output = hidden_states.gather(-2, cls_index).squeeze(-2) # shape (bsz, XX, hidden_size)
|
||||||
|
elif self.summary_type == "attn":
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
output = self.first_dropout(output)
|
||||||
|
output = self.summary(output)
|
||||||
|
output = self.activation(output)
|
||||||
|
output = self.last_dropout(output)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
def unwrap_model(model: nn.Module, recursive: bool = False) -> nn.Module:
|
def unwrap_model(model: nn.Module, recursive: bool = False) -> nn.Module:
|
||||||
"""
|
"""
|
||||||
Recursively unwraps a model from potential containers (as used in distributed training).
|
Recursively unwraps a model from potential containers (as used in distributed training).
|
||||||
|
|||||||
@ -206,7 +206,7 @@ def convert_audio_spectrogram_transformer_checkpoint(model_name, pytorch_dump_fo
|
|||||||
|
|
||||||
if "speech-commands" in model_name:
|
if "speech-commands" in model_name:
|
||||||
# TODO: Convert dataset to Parquet
|
# TODO: Convert dataset to Parquet
|
||||||
dataset = load_dataset("google/speech_commands", "v0.02", split="validation")
|
dataset = load_dataset("google/speech_commands", "v0.02", split="validation", trust_remote_code=True)
|
||||||
waveform = dataset[0]["audio"]["array"]
|
waveform = dataset[0]["audio"]["array"]
|
||||||
else:
|
else:
|
||||||
filepath = hf_hub_download(
|
filepath = hf_hub_download(
|
||||||
|
|||||||
@ -245,10 +245,6 @@ TOKENIZER_MAPPING_NAMES = OrderedDict[str, tuple[Optional[str], Optional[str]]](
|
|||||||
("gpt_neox_japanese", ("GPTNeoXJapaneseTokenizer", None)),
|
("gpt_neox_japanese", ("GPTNeoXJapaneseTokenizer", None)),
|
||||||
("gptj", ("GPT2Tokenizer", "GPT2TokenizerFast" if is_tokenizers_available() else None)),
|
("gptj", ("GPT2Tokenizer", "GPT2TokenizerFast" if is_tokenizers_available() else None)),
|
||||||
("gptsan-japanese", ("GPTSanJapaneseTokenizer", None)),
|
("gptsan-japanese", ("GPTSanJapaneseTokenizer", None)),
|
||||||
("granite", ("GPT2Tokenizer", None)),
|
|
||||||
("granitemoe", ("GPT2Tokenizer", None)),
|
|
||||||
("granitemoehybrid", ("GPT2Tokenizer", None)),
|
|
||||||
("granitemoeshared", ("GPT2Tokenizer", None)),
|
|
||||||
("grounding-dino", ("BertTokenizer", "BertTokenizerFast" if is_tokenizers_available() else None)),
|
("grounding-dino", ("BertTokenizer", "BertTokenizerFast" if is_tokenizers_available() else None)),
|
||||||
("groupvit", ("CLIPTokenizer", "CLIPTokenizerFast" if is_tokenizers_available() else None)),
|
("groupvit", ("CLIPTokenizer", "CLIPTokenizerFast" if is_tokenizers_available() else None)),
|
||||||
("helium", (None, "PreTrainedTokenizerFast" if is_tokenizers_available() else None)),
|
("helium", (None, "PreTrainedTokenizerFast" if is_tokenizers_available() else None)),
|
||||||
|
|||||||
@ -266,7 +266,7 @@ def convert_beit_checkpoint(checkpoint_url, pytorch_dump_folder_path):
|
|||||||
# Check outputs on an image
|
# Check outputs on an image
|
||||||
if is_semantic:
|
if is_semantic:
|
||||||
image_processor = BeitImageProcessor(size=config.image_size, do_center_crop=False)
|
image_processor = BeitImageProcessor(size=config.image_size, do_center_crop=False)
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||||
image = Image.open(ds[0]["file"])
|
image = Image.open(ds[0]["file"])
|
||||||
else:
|
else:
|
||||||
image_processor = BeitImageProcessor(
|
image_processor = BeitImageProcessor(
|
||||||
|
|||||||
@ -15,14 +15,7 @@
|
|||||||
|
|
||||||
"""English Normalizer class for CLVP."""
|
"""English Normalizer class for CLVP."""
|
||||||
|
|
||||||
import sys
|
import re
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info >= (3, 11):
|
|
||||||
# Atomic grouping support was only added to the core RE in Python 3.11
|
|
||||||
import re
|
|
||||||
else:
|
|
||||||
import regex as re
|
|
||||||
|
|
||||||
|
|
||||||
class EnglishNormalizer:
|
class EnglishNormalizer:
|
||||||
@ -206,12 +199,12 @@ class EnglishNormalizer:
|
|||||||
This method is used to normalize numbers within a text such as converting the numbers to words, removing
|
This method is used to normalize numbers within a text such as converting the numbers to words, removing
|
||||||
commas, etc.
|
commas, etc.
|
||||||
"""
|
"""
|
||||||
text = re.sub(r"([0-9][0-9,]+[0-9])", self._remove_commas, text)
|
text = re.sub(re.compile(r"([0-9][0-9\,]+[0-9])"), self._remove_commas, text)
|
||||||
text = re.sub(r"£([0-9,]*[0-9])", r"\1 pounds", text)
|
text = re.sub(re.compile(r"£([0-9\,]*[0-9]+)"), r"\1 pounds", text)
|
||||||
text = re.sub(r"\$([0-9.,]*[0-9])", self._expand_dollars, text)
|
text = re.sub(re.compile(r"\$([0-9\.\,]*[0-9]+)"), self._expand_dollars, text)
|
||||||
text = re.sub(r"([0-9]++\.[0-9]+)", self._expand_decimal_point, text)
|
text = re.sub(re.compile(r"([0-9]+\.[0-9]+)"), self._expand_decimal_point, text)
|
||||||
text = re.sub(r"[0-9]++(st|nd|rd|th)", self._expand_ordinal, text)
|
text = re.sub(re.compile(r"[0-9]+(st|nd|rd|th)"), self._expand_ordinal, text)
|
||||||
text = re.sub(r"[0-9]+", self._expand_number, text)
|
text = re.sub(re.compile(r"[0-9]+"), self._expand_number, text)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def expand_abbreviations(self, text: str) -> str:
|
def expand_abbreviations(self, text: str) -> str:
|
||||||
|
|||||||
@ -226,7 +226,7 @@ def convert_wav2vec2_checkpoint(
|
|||||||
|
|
||||||
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-lv60")
|
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-lv60")
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True)
|
||||||
input_audio = [x["array"] for x in ds[:4]["audio"]]
|
input_audio = [x["array"] for x in ds[:4]["audio"]]
|
||||||
|
|
||||||
inputs = processor(input_audio, return_tensors="pt", padding=True)
|
inputs = processor(input_audio, return_tensors="pt", padding=True)
|
||||||
|
|||||||
@ -1223,7 +1223,7 @@ class LayoutLMForQuestionAnswering(LayoutLMPreTrainedModel):
|
|||||||
>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
|
>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
|
||||||
>>> model = LayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")
|
>>> model = LayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd", split="train")
|
>>> dataset = load_dataset("nielsr/funsd", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> question = "what's his name?"
|
>>> question = "what's his name?"
|
||||||
>>> words = example["words"]
|
>>> words = example["words"]
|
||||||
|
|||||||
@ -1601,7 +1601,7 @@ class TFLayoutLMForQuestionAnswering(TFLayoutLMPreTrainedModel, TFQuestionAnswer
|
|||||||
>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
|
>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
|
||||||
>>> model = TFLayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")
|
>>> model = TFLayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd", split="train")
|
>>> dataset = load_dataset("nielsr/funsd", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> question = "what's his name?"
|
>>> question = "what's his name?"
|
||||||
>>> words = example["words"]
|
>>> words = example["words"]
|
||||||
|
|||||||
@ -763,8 +763,9 @@ class LayoutLMv2Model(LayoutLMv2PreTrainedModel):
|
|||||||
>>> model = LayoutLMv2Model.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
>>> model = LayoutLMv2Model.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
||||||
|
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa")
|
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa", trust_remote_code=True)
|
||||||
>>> image = dataset["test"][0]["image"]
|
>>> image_path = dataset["test"][0]["file"]
|
||||||
|
>>> image = Image.open(image_path).convert("RGB")
|
||||||
|
|
||||||
>>> encoding = processor(image, return_tensors="pt")
|
>>> encoding = processor(image, return_tensors="pt")
|
||||||
|
|
||||||
@ -952,7 +953,7 @@ class LayoutLMv2ForSequenceClassification(LayoutLMv2PreTrainedModel):
|
|||||||
|
|
||||||
>>> set_seed(0)
|
>>> set_seed(0)
|
||||||
|
|
||||||
>>> dataset = load_dataset("aharley/rvl_cdip", split="train", streaming=True)
|
>>> dataset = load_dataset("aharley/rvl_cdip", split="train", streaming=True, trust_remote_code=True)
|
||||||
>>> data = next(iter(dataset))
|
>>> data = next(iter(dataset))
|
||||||
>>> image = data["image"].convert("RGB")
|
>>> image = data["image"].convert("RGB")
|
||||||
|
|
||||||
@ -1154,7 +1155,7 @@ class LayoutLMv2ForTokenClassification(LayoutLMv2PreTrainedModel):
|
|||||||
|
|
||||||
>>> set_seed(0)
|
>>> set_seed(0)
|
||||||
|
|
||||||
>>> datasets = load_dataset("nielsr/funsd", split="test")
|
>>> datasets = load_dataset("nielsr/funsd", split="test", trust_remote_code=True)
|
||||||
>>> labels = datasets.features["ner_tags"].feature.names
|
>>> labels = datasets.features["ner_tags"].feature.names
|
||||||
>>> id2label = {v: k for v, k in enumerate(labels)}
|
>>> id2label = {v: k for v, k in enumerate(labels)}
|
||||||
|
|
||||||
@ -1311,8 +1312,9 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel):
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
||||||
>>> model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
>>> model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa")
|
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa", trust_remote_code=True)
|
||||||
>>> image = dataset["test"][0]["image"]
|
>>> image_path = dataset["test"][0]["file"]
|
||||||
|
>>> image = Image.open(image_path).convert("RGB")
|
||||||
>>> question = "When is coffee break?"
|
>>> question = "When is coffee break?"
|
||||||
>>> encoding = processor(image, question, return_tensors="pt")
|
>>> encoding = processor(image, question, return_tensors="pt")
|
||||||
|
|
||||||
|
|||||||
@ -746,7 +746,7 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||||
>>> model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
|
>>> model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
@ -961,7 +961,7 @@ class LayoutLMv3ForTokenClassification(LayoutLMv3PreTrainedModel):
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||||
>>> model = AutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7)
|
>>> model = AutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7)
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
@ -1062,7 +1062,7 @@ class LayoutLMv3ForQuestionAnswering(LayoutLMv3PreTrainedModel):
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||||
>>> model = AutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")
|
>>> model = AutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> question = "what's his name?"
|
>>> question = "what's his name?"
|
||||||
@ -1182,7 +1182,7 @@ class LayoutLMv3ForSequenceClassification(LayoutLMv3PreTrainedModel):
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||||
>>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
|
>>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
|
|||||||
@ -1296,7 +1296,7 @@ class TFLayoutLMv3Model(TFLayoutLMv3PreTrainedModel):
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||||
>>> model = TFAutoModel.from_pretrained("microsoft/layoutlmv3-base")
|
>>> model = TFAutoModel.from_pretrained("microsoft/layoutlmv3-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
@ -1439,7 +1439,7 @@ class TFLayoutLMv3ForSequenceClassification(TFLayoutLMv3PreTrainedModel, TFSeque
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||||
>>> model = TFAutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
|
>>> model = TFAutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
@ -1566,7 +1566,7 @@ class TFLayoutLMv3ForTokenClassification(TFLayoutLMv3PreTrainedModel, TFTokenCla
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||||
>>> model = TFAutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7)
|
>>> model = TFAutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7)
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
@ -1703,7 +1703,7 @@ class TFLayoutLMv3ForQuestionAnswering(TFLayoutLMv3PreTrainedModel, TFQuestionAn
|
|||||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||||
>>> model = TFAutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")
|
>>> model = TFAutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> question = "what's his name?"
|
>>> question = "what's his name?"
|
||||||
|
|||||||
@ -653,7 +653,7 @@ class LiltModel(LiltPreTrainedModel):
|
|||||||
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||||
>>> model = AutoModel.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
>>> model = AutoModel.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
>>> boxes = example["bboxes"]
|
>>> boxes = example["bboxes"]
|
||||||
@ -793,7 +793,7 @@ class LiltForSequenceClassification(LiltPreTrainedModel):
|
|||||||
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||||
>>> model = AutoModelForSequenceClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
>>> model = AutoModelForSequenceClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
>>> boxes = example["bboxes"]
|
>>> boxes = example["bboxes"]
|
||||||
@ -908,7 +908,7 @@ class LiltForTokenClassification(LiltPreTrainedModel):
|
|||||||
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||||
>>> model = AutoModelForTokenClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
>>> model = AutoModelForTokenClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
>>> boxes = example["bboxes"]
|
>>> boxes = example["bboxes"]
|
||||||
@ -1025,7 +1025,7 @@ class LiltForQuestionAnswering(LiltPreTrainedModel):
|
|||||||
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||||
>>> model = AutoModelForQuestionAnswering.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
>>> model = AutoModelForQuestionAnswering.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||||
|
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
>>> boxes = example["bboxes"]
|
>>> boxes = example["bboxes"]
|
||||||
|
|||||||
@ -2228,7 +2228,7 @@ class SpeechT5ForSpeechToText(SpeechT5PreTrainedModel, GenerationMixin):
|
|||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
|
|
||||||
>>> dataset = load_dataset(
|
>>> dataset = load_dataset(
|
||||||
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation"
|
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True
|
||||||
... ) # doctest: +IGNORE_RESULT
|
... ) # doctest: +IGNORE_RESULT
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
@ -2909,7 +2909,7 @@ class SpeechT5ForSpeechToSpeech(SpeechT5PreTrainedModel):
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> dataset = load_dataset(
|
>>> dataset = load_dataset(
|
||||||
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation"
|
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True
|
||||||
... ) # doctest: +IGNORE_RESULT
|
... ) # doctest: +IGNORE_RESULT
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
|
|||||||
@ -1604,7 +1604,7 @@ class UdopModel(UdopPreTrainedModel):
|
|||||||
|
|
||||||
>>> # load an example image, along with the words and coordinates
|
>>> # load an example image, along with the words and coordinates
|
||||||
>>> # which were extracted using an OCR engine
|
>>> # which were extracted using an OCR engine
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
@ -1813,7 +1813,7 @@ class UdopForConditionalGeneration(UdopPreTrainedModel, GenerationMixin):
|
|||||||
|
|
||||||
>>> # load an example image, along with the words and coordinates
|
>>> # load an example image, along with the words and coordinates
|
||||||
>>> # which were extracted using an OCR engine
|
>>> # which were extracted using an OCR engine
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
@ -2025,7 +2025,7 @@ class UdopEncoderModel(UdopPreTrainedModel):
|
|||||||
|
|
||||||
>>> # load an example image, along with the words and coordinates
|
>>> # load an example image, along with the words and coordinates
|
||||||
>>> # which were extracted using an OCR engine
|
>>> # which were extracted using an OCR engine
|
||||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||||
>>> example = dataset[0]
|
>>> example = dataset[0]
|
||||||
>>> image = example["image"]
|
>>> image = example["image"]
|
||||||
>>> words = example["tokens"]
|
>>> words = example["tokens"]
|
||||||
|
|||||||
@ -590,7 +590,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer):
|
|||||||
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
|
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
|
||||||
|
|
||||||
>>> # load first sample of English common_voice
|
>>> # load first sample of English common_voice
|
||||||
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
|
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True)
|
||||||
>>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
>>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
||||||
>>> dataset_iter = iter(dataset)
|
>>> dataset_iter = iter(dataset)
|
||||||
>>> sample = next(dataset_iter)
|
>>> sample = next(dataset_iter)
|
||||||
|
|||||||
@ -546,7 +546,7 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin):
|
|||||||
>>> processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")
|
>>> processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")
|
||||||
|
|
||||||
>>> # load first sample of English common_voice
|
>>> # load first sample of English common_voice
|
||||||
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
|
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True)
|
||||||
>>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
>>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
||||||
>>> dataset_iter = iter(dataset)
|
>>> dataset_iter = iter(dataset)
|
||||||
>>> sample = next(dataset_iter)
|
>>> sample = next(dataset_iter)
|
||||||
|
|||||||
@ -1670,7 +1670,7 @@ FLAX_WHISPER_AUDIO_CLASSIFICATION_DOCSTRING = r"""
|
|||||||
>>> model = FlaxWhisperForAudioClassification.from_pretrained(
|
>>> model = FlaxWhisperForAudioClassification.from_pretrained(
|
||||||
... "sanchit-gandhi/whisper-medium-fleurs-lang-id", from_pt=True
|
... "sanchit-gandhi/whisper-medium-fleurs-lang-id", from_pt=True
|
||||||
... )
|
... )
|
||||||
>>> ds = load_dataset("google/fleurs", "all", split="validation", streaming=True)
|
>>> ds = load_dataset("google/fleurs", "all", split="validation", streaming=True, trust_remote_code=True)
|
||||||
|
|
||||||
>>> sample = next(iter(ds))
|
>>> sample = next(iter(ds))
|
||||||
|
|
||||||
|
|||||||
@ -526,15 +526,12 @@ class Trainer:
|
|||||||
if is_liger_kernel_available():
|
if is_liger_kernel_available():
|
||||||
from liger_kernel.transformers import _apply_liger_kernel_to_instance
|
from liger_kernel.transformers import _apply_liger_kernel_to_instance
|
||||||
|
|
||||||
# Prepare kernel config - use provided config or default (empty dict for default behavior)
|
|
||||||
kernel_config = self.args.liger_kernel_config if self.args.liger_kernel_config is not None else {}
|
|
||||||
|
|
||||||
if isinstance(model, PreTrainedModel):
|
if isinstance(model, PreTrainedModel):
|
||||||
# Patch the model with liger kernels. Use the the specified or default kernel configurations.
|
# Patch the model with liger kernels. Use the default kernel configurations.
|
||||||
_apply_liger_kernel_to_instance(model=model, **kernel_config)
|
_apply_liger_kernel_to_instance(model=model)
|
||||||
elif hasattr(model, "get_base_model") and isinstance(model.get_base_model(), PreTrainedModel):
|
elif hasattr(model, "get_base_model") and isinstance(model.get_base_model(), PreTrainedModel):
|
||||||
# Patch the base model with liger kernels where model is a PeftModel. Use the specified or default kernel configurations.
|
# Patch the base model with liger kernels where model is a PeftModel. Use the default kernel configurations.
|
||||||
_apply_liger_kernel_to_instance(model=model.get_base_model(), **kernel_config)
|
_apply_liger_kernel_to_instance(model=model.get_base_model())
|
||||||
else:
|
else:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"The model is not an instance of PreTrainedModel. No liger kernels will be applied."
|
"The model is not an instance of PreTrainedModel. No liger kernels will be applied."
|
||||||
|
|||||||
@ -793,11 +793,6 @@ class TrainingArguments:
|
|||||||
It can effectively increase multi-GPU training throughput by ~20% and reduces memory usage by ~60%, works out of the box with
|
It can effectively increase multi-GPU training throughput by ~20% and reduces memory usage by ~60%, works out of the box with
|
||||||
flash attention, PyTorch FSDP, and Microsoft DeepSpeed. Currently, it supports llama, mistral, mixtral and gemma models.
|
flash attention, PyTorch FSDP, and Microsoft DeepSpeed. Currently, it supports llama, mistral, mixtral and gemma models.
|
||||||
|
|
||||||
liger_kernel_config (`Optional[dict]`, *optional*):
|
|
||||||
Configuration to be used for Liger Kernel. When use_liger_kernel=True, this dict is passed as keyword arguments to the
|
|
||||||
`_apply_liger_kernel_to_instance` function, which specifies which kernels to apply. Available options vary by model but typically
|
|
||||||
include: 'rope', 'swiglu', 'cross_entropy', 'fused_linear_cross_entropy', 'rms_norm', etc. If `None`, use the default kernel configurations.
|
|
||||||
|
|
||||||
average_tokens_across_devices (`bool`, *optional*, defaults to `False`):
|
average_tokens_across_devices (`bool`, *optional*, defaults to `False`):
|
||||||
Whether or not to average tokens across devices. If enabled, will use all_reduce to synchronize
|
Whether or not to average tokens across devices. If enabled, will use all_reduce to synchronize
|
||||||
num_tokens_in_batch for precise loss calculation. Reference:
|
num_tokens_in_batch for precise loss calculation. Reference:
|
||||||
@ -1530,19 +1525,6 @@ class TrainingArguments:
|
|||||||
metadata={"help": "Whether or not to enable the Liger Kernel for model training."},
|
metadata={"help": "Whether or not to enable the Liger Kernel for model training."},
|
||||||
)
|
)
|
||||||
|
|
||||||
liger_kernel_config: Optional[dict[str, bool]] = field(
|
|
||||||
default=None,
|
|
||||||
metadata={
|
|
||||||
"help": (
|
|
||||||
"Configuration to be used for Liger Kernel. When use_liger_kernel=True, "
|
|
||||||
"this dict is passed as keyword arguments to the `_apply_liger_kernel_to_instance` function, "
|
|
||||||
"which specifies which kernels to apply. Available options vary by model "
|
|
||||||
"but typically include: 'rope', 'swiglu', 'cross_entropy', 'fused_linear_cross_entropy', "
|
|
||||||
"'rms_norm', etc. If None, use the default kernel configurations."
|
|
||||||
)
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
eval_use_gather_object: Optional[bool] = field(
|
eval_use_gather_object: Optional[bool] = field(
|
||||||
default=False,
|
default=False,
|
||||||
metadata={
|
metadata={
|
||||||
|
|||||||
@ -423,7 +423,7 @@ PT_SPEECH_BASE_MODEL_SAMPLE = r"""
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
|
|
||||||
@ -449,7 +449,7 @@ PT_SPEECH_CTC_SAMPLE = r"""
|
|||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
|
|
||||||
@ -484,7 +484,7 @@ PT_SPEECH_SEQ_CLASS_SAMPLE = r"""
|
|||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
|
|
||||||
@ -520,7 +520,7 @@ PT_SPEECH_FRAME_CLASS_SAMPLE = r"""
|
|||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
|
|
||||||
@ -549,7 +549,7 @@ PT_SPEECH_XVECTOR_SAMPLE = r"""
|
|||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
|
|
||||||
@ -584,7 +584,7 @@ PT_VISION_BASE_MODEL_SAMPLE = r"""
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
|
|
||||||
>>> dataset = load_dataset("huggingface/cats-image")
|
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
|
||||||
>>> image = dataset["test"]["image"][0]
|
>>> image = dataset["test"]["image"][0]
|
||||||
|
|
||||||
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
||||||
@ -609,7 +609,7 @@ PT_VISION_SEQ_CLASS_SAMPLE = r"""
|
|||||||
>>> import torch
|
>>> import torch
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
|
|
||||||
>>> dataset = load_dataset("huggingface/cats-image")
|
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
|
||||||
>>> image = dataset["test"]["image"][0]
|
>>> image = dataset["test"]["image"][0]
|
||||||
|
|
||||||
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
||||||
@ -1194,7 +1194,7 @@ TF_SPEECH_BASE_MODEL_SAMPLE = r"""
|
|||||||
>>> from transformers import AutoProcessor, {model_class}
|
>>> from transformers import AutoProcessor, {model_class}
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
|
|
||||||
@ -1219,7 +1219,7 @@ TF_SPEECH_CTC_SAMPLE = r"""
|
|||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
|
|
||||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
||||||
>>> dataset = dataset.sort("id")
|
>>> dataset = dataset.sort("id")
|
||||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||||
|
|
||||||
@ -1254,7 +1254,7 @@ TF_VISION_BASE_MODEL_SAMPLE = r"""
|
|||||||
>>> from transformers import AutoImageProcessor, {model_class}
|
>>> from transformers import AutoImageProcessor, {model_class}
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
|
|
||||||
>>> dataset = load_dataset("huggingface/cats-image")
|
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
|
||||||
>>> image = dataset["test"]["image"][0]
|
>>> image = dataset["test"]["image"][0]
|
||||||
|
|
||||||
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
||||||
@ -1277,7 +1277,7 @@ TF_VISION_SEQ_CLASS_SAMPLE = r"""
|
|||||||
>>> import tensorflow as tf
|
>>> import tensorflow as tf
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
|
|
||||||
>>> dataset = load_dataset("huggingface/cats-image"))
|
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
|
||||||
>>> image = dataset["test"]["image"][0]
|
>>> image = dataset["test"]["image"][0]
|
||||||
|
|
||||||
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
||||||
|
|||||||
@ -269,6 +269,7 @@ def make_task_cmds():
|
|||||||
"img_clas": f"""
|
"img_clas": f"""
|
||||||
{scripts_dir}/image-classification/run_image_classification.py
|
{scripts_dir}/image-classification/run_image_classification.py
|
||||||
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
||||||
|
--trust_remote_code
|
||||||
--remove_unused_columns False
|
--remove_unused_columns False
|
||||||
--max_steps 10
|
--max_steps 10
|
||||||
--image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json
|
--image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json
|
||||||
|
|||||||
@ -27,6 +27,8 @@ if is_torch_available():
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
if is_vision_available():
|
if is_vision_available():
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
from transformers import BeitImageProcessor
|
from transformers import BeitImageProcessor
|
||||||
|
|
||||||
if is_torchvision_available():
|
if is_torchvision_available():
|
||||||
@ -96,14 +98,23 @@ class BeitImageProcessingTester:
|
|||||||
|
|
||||||
|
|
||||||
def prepare_semantic_single_inputs():
|
def prepare_semantic_single_inputs():
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||||
example = ds[0]
|
|
||||||
return example["image"], example["map"]
|
image = Image.open(dataset[0]["file"])
|
||||||
|
map = Image.open(dataset[1]["file"])
|
||||||
|
|
||||||
|
return image, map
|
||||||
|
|
||||||
|
|
||||||
def prepare_semantic_batch_inputs():
|
def prepare_semantic_batch_inputs():
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||||
return list(ds["image"][:2]), list(ds["map"][:2])
|
|
||||||
|
image1 = Image.open(ds[0]["file"])
|
||||||
|
map1 = Image.open(ds[1]["file"])
|
||||||
|
image2 = Image.open(ds[2]["file"])
|
||||||
|
map2 = Image.open(ds[3]["file"])
|
||||||
|
|
||||||
|
return [image1, image2], [map1, map2]
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
|
|||||||
@ -504,7 +504,7 @@ class BeitModelIntegrationTest(unittest.TestCase):
|
|||||||
|
|
||||||
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
|
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||||
image = Image.open(ds[0]["file"])
|
image = Image.open(ds[0]["file"])
|
||||||
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
|
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
|
||||||
|
|
||||||
@ -547,7 +547,7 @@ class BeitModelIntegrationTest(unittest.TestCase):
|
|||||||
|
|
||||||
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
|
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||||
image = Image.open(ds[0]["file"])
|
image = Image.open(ds[0]["file"])
|
||||||
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
|
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
|
||||||
|
|
||||||
|
|||||||
@ -669,7 +669,7 @@ class Data2VecAudioModelIntegrationTest(unittest.TestCase):
|
|||||||
return [x["array"] for x in speech_samples]
|
return [x["array"] for x in speech_samples]
|
||||||
|
|
||||||
def _load_superb(self, task, num_samples):
|
def _load_superb(self, task, num_samples):
|
||||||
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
||||||
|
|
||||||
return ds[:num_samples]
|
return ds[:num_samples]
|
||||||
|
|
||||||
|
|||||||
@ -29,6 +29,8 @@ if is_torch_available():
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
if is_vision_available():
|
if is_vision_available():
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
from transformers import DPTImageProcessor
|
from transformers import DPTImageProcessor
|
||||||
|
|
||||||
if is_torchvision_available():
|
if is_torchvision_available():
|
||||||
@ -92,15 +94,24 @@ class DPTImageProcessingTester:
|
|||||||
|
|
||||||
# Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_single_inputs
|
# Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_single_inputs
|
||||||
def prepare_semantic_single_inputs():
|
def prepare_semantic_single_inputs():
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||||
example = ds[0]
|
|
||||||
return example["image"], example["map"]
|
image = Image.open(dataset[0]["file"])
|
||||||
|
map = Image.open(dataset[1]["file"])
|
||||||
|
|
||||||
|
return image, map
|
||||||
|
|
||||||
|
|
||||||
# Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_batch_inputs
|
# Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_batch_inputs
|
||||||
def prepare_semantic_batch_inputs():
|
def prepare_semantic_batch_inputs():
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||||
return list(ds["image"][:2]), list(ds["map"][:2])
|
|
||||||
|
image1 = Image.open(ds[0]["file"])
|
||||||
|
map1 = Image.open(ds[1]["file"])
|
||||||
|
image2 = Image.open(ds[2]["file"])
|
||||||
|
map2 = Image.open(ds[3]["file"])
|
||||||
|
|
||||||
|
return [image1, image2], [map1, map2]
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
|
|||||||
@ -767,7 +767,7 @@ class HubertModelIntegrationTest(unittest.TestCase):
|
|||||||
def _load_superb(self, task, num_samples):
|
def _load_superb(self, task, num_samples):
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
||||||
|
|
||||||
return ds[:num_samples]
|
return ds[:num_samples]
|
||||||
|
|
||||||
|
|||||||
@ -111,13 +111,13 @@ class LayoutLMv2ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
|
|||||||
def test_layoutlmv2_integration_test(self):
|
def test_layoutlmv2_integration_test(self):
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
||||||
|
|
||||||
for image_processing_class in self.image_processor_list:
|
for image_processing_class in self.image_processor_list:
|
||||||
# with apply_OCR = True
|
# with apply_OCR = True
|
||||||
image_processing = image_processing_class()
|
image_processing = image_processing_class()
|
||||||
|
|
||||||
image = ds[0]["image"]
|
image = Image.open(ds[0]["file"]).convert("RGB")
|
||||||
|
|
||||||
encoding = image_processing(image, return_tensors="pt")
|
encoding = image_processing(image, return_tensors="pt")
|
||||||
|
|
||||||
|
|||||||
@ -156,7 +156,7 @@ class LayoutLMv2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
|||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
# set up
|
# set up
|
||||||
datasets = load_dataset("nielsr/funsd")
|
datasets = load_dataset("nielsr/funsd", trust_remote_code=True)
|
||||||
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
|
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
|
||||||
|
|
||||||
def preprocess_data(examples):
|
def preprocess_data(examples):
|
||||||
@ -192,8 +192,12 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
|
|||||||
# we verify our implementation on 2 document images from the DocVQA dataset
|
# we verify our implementation on 2 document images from the DocVQA dataset
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
||||||
return ds[0]["image"], ds[1]["image"]
|
|
||||||
|
image_1 = Image.open(ds[0]["file"]).convert("RGB")
|
||||||
|
image_2 = Image.open(ds[1]["file"]).convert("RGB")
|
||||||
|
|
||||||
|
return image_1, image_2
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def get_tokenizers(self):
|
def get_tokenizers(self):
|
||||||
|
|||||||
@ -22,6 +22,8 @@ from ...test_image_processing_common import ImageProcessingTestMixin, prepare_im
|
|||||||
|
|
||||||
|
|
||||||
if is_pytesseract_available():
|
if is_pytesseract_available():
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
from transformers import LayoutLMv3ImageProcessor
|
from transformers import LayoutLMv3ImageProcessor
|
||||||
|
|
||||||
if is_torchvision_available():
|
if is_torchvision_available():
|
||||||
@ -104,13 +106,13 @@ class LayoutLMv3ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
|
|||||||
def test_LayoutLMv3_integration_test(self):
|
def test_LayoutLMv3_integration_test(self):
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
||||||
|
|
||||||
# with apply_OCR = True
|
# with apply_OCR = True
|
||||||
for image_processing_class in self.image_processor_list:
|
for image_processing_class in self.image_processor_list:
|
||||||
image_processor = image_processing_class()
|
image_processor = image_processing_class()
|
||||||
|
|
||||||
image = ds[0]["image"].convert("RGB")
|
image = Image.open(ds[0]["file"]).convert("RGB")
|
||||||
|
|
||||||
encoding = image_processor(image, return_tensors="pt")
|
encoding = image_processor(image, return_tensors="pt")
|
||||||
|
|
||||||
|
|||||||
@ -28,6 +28,8 @@ from ...test_processing_common import ProcessorTesterMixin
|
|||||||
|
|
||||||
|
|
||||||
if is_pytesseract_available():
|
if is_pytesseract_available():
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
from transformers import LayoutLMv3ImageProcessor
|
from transformers import LayoutLMv3ImageProcessor
|
||||||
|
|
||||||
|
|
||||||
@ -170,8 +172,12 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
|
|||||||
# we verify our implementation on 2 document images from the DocVQA dataset
|
# we verify our implementation on 2 document images from the DocVQA dataset
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
||||||
return ds[0]["image"], ds[1]["image"]
|
|
||||||
|
image_1 = Image.open(ds[0]["file"]).convert("RGB")
|
||||||
|
image_2 = Image.open(ds[1]["file"]).convert("RGB")
|
||||||
|
|
||||||
|
return image_1, image_2
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def get_tokenizers(self):
|
def get_tokenizers(self):
|
||||||
|
|||||||
@ -162,7 +162,7 @@ class LayoutXLMProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
|||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
# set up
|
# set up
|
||||||
datasets = load_dataset("nielsr/funsd")
|
datasets = load_dataset("nielsr/funsd", trust_remote_code=True)
|
||||||
processor = LayoutXLMProcessor.from_pretrained("microsoft/layoutxlm-base", apply_ocr=False)
|
processor = LayoutXLMProcessor.from_pretrained("microsoft/layoutxlm-base", apply_ocr=False)
|
||||||
|
|
||||||
def preprocess_data(examples):
|
def preprocess_data(examples):
|
||||||
@ -200,8 +200,12 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
|
|||||||
# we verify our implementation on 2 document images from the DocVQA dataset
|
# we verify our implementation on 2 document images from the DocVQA dataset
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
||||||
return ds[0]["image"], ds[1]["image"]
|
|
||||||
|
image_1 = Image.open(ds[0]["file"]).convert("RGB")
|
||||||
|
image_2 = Image.open(ds[1]["file"]).convert("RGB")
|
||||||
|
|
||||||
|
return image_1, image_2
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def get_tokenizers(self):
|
def get_tokenizers(self):
|
||||||
|
|||||||
@ -27,6 +27,8 @@ if is_torch_available():
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
if is_vision_available():
|
if is_vision_available():
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
from transformers import MobileViTImageProcessor
|
from transformers import MobileViTImageProcessor
|
||||||
|
|
||||||
|
|
||||||
@ -84,14 +86,23 @@ class MobileViTImageProcessingTester:
|
|||||||
|
|
||||||
|
|
||||||
def prepare_semantic_single_inputs():
|
def prepare_semantic_single_inputs():
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||||
example = ds[0]
|
|
||||||
return example["image"], example["map"]
|
image = Image.open(dataset[0]["file"])
|
||||||
|
map = Image.open(dataset[1]["file"])
|
||||||
|
|
||||||
|
return image, map
|
||||||
|
|
||||||
|
|
||||||
def prepare_semantic_batch_inputs():
|
def prepare_semantic_batch_inputs():
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||||
return list(ds["image"][:2]), list(ds["map"][:2])
|
|
||||||
|
image1 = Image.open(dataset[0]["file"])
|
||||||
|
map1 = Image.open(dataset[1]["file"])
|
||||||
|
image2 = Image.open(dataset[2]["file"])
|
||||||
|
map2 = Image.open(dataset[3]["file"])
|
||||||
|
|
||||||
|
return [image1, image2], [map1, map2]
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
|
|||||||
@ -86,12 +86,8 @@ class NougatImageProcessingTester:
|
|||||||
return self.num_channels, self.size["height"], self.size["width"]
|
return self.num_channels, self.size["height"], self.size["width"]
|
||||||
|
|
||||||
def prepare_dummy_image(self):
|
def prepare_dummy_image(self):
|
||||||
revision = "ec57bf8c8b1653a209c13f6e9ee66b12df0fc2db"
|
|
||||||
filepath = hf_hub_download(
|
filepath = hf_hub_download(
|
||||||
repo_id="hf-internal-testing/fixtures_docvqa",
|
repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_pdf.png", repo_type="dataset"
|
||||||
filename="nougat_pdf.png",
|
|
||||||
repo_type="dataset",
|
|
||||||
revision=revision,
|
|
||||||
)
|
)
|
||||||
image = Image.open(filepath).convert("RGB")
|
image = Image.open(filepath).convert("RGB")
|
||||||
return image
|
return image
|
||||||
@ -183,12 +179,8 @@ class NougatImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
|||||||
self.assertEqual((3, 100, 200), aligned_image.shape)
|
self.assertEqual((3, 100, 200), aligned_image.shape)
|
||||||
|
|
||||||
def prepare_dummy_np_image(self):
|
def prepare_dummy_np_image(self):
|
||||||
revision = "ec57bf8c8b1653a209c13f6e9ee66b12df0fc2db"
|
|
||||||
filepath = hf_hub_download(
|
filepath = hf_hub_download(
|
||||||
repo_id="hf-internal-testing/fixtures_docvqa",
|
repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_pdf.png", repo_type="dataset"
|
||||||
filename="nougat_pdf.png",
|
|
||||||
repo_type="dataset",
|
|
||||||
revision=revision,
|
|
||||||
)
|
)
|
||||||
image = Image.open(filepath).convert("RGB")
|
image = Image.open(filepath).convert("RGB")
|
||||||
return np.array(image)
|
return np.array(image)
|
||||||
|
|||||||
@ -842,8 +842,11 @@ def prepare_img():
|
|||||||
|
|
||||||
# Helper functions for optical flow integration test
|
# Helper functions for optical flow integration test
|
||||||
def prepare_optical_flow_images():
|
def prepare_optical_flow_images():
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_sintel", split="test")
|
dataset = load_dataset("hf-internal-testing/fixtures_sintel", split="test", trust_remote_code=True)
|
||||||
return list(ds["image"][:2])
|
image1 = Image.open(dataset[0]["file"]).convert("RGB")
|
||||||
|
image2 = Image.open(dataset[0]["file"]).convert("RGB")
|
||||||
|
|
||||||
|
return image1, image2
|
||||||
|
|
||||||
|
|
||||||
def normalize(img):
|
def normalize(img):
|
||||||
|
|||||||
@ -27,6 +27,8 @@ if is_torch_available():
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
if is_vision_available():
|
if is_vision_available():
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
from transformers import SegformerImageProcessor
|
from transformers import SegformerImageProcessor
|
||||||
|
|
||||||
|
|
||||||
@ -84,14 +86,23 @@ class SegformerImageProcessingTester:
|
|||||||
|
|
||||||
|
|
||||||
def prepare_semantic_single_inputs():
|
def prepare_semantic_single_inputs():
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||||
example = ds[0]
|
|
||||||
return example["image"], example["map"]
|
image = Image.open(dataset[0]["file"])
|
||||||
|
map = Image.open(dataset[1]["file"])
|
||||||
|
|
||||||
|
return image, map
|
||||||
|
|
||||||
|
|
||||||
def prepare_semantic_batch_inputs():
|
def prepare_semantic_batch_inputs():
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||||
return list(ds["image"][:2]), list(ds["map"][:2])
|
|
||||||
|
image1 = Image.open(dataset[0]["file"])
|
||||||
|
map1 = Image.open(dataset[1]["file"])
|
||||||
|
image2 = Image.open(dataset[2]["file"])
|
||||||
|
map2 = Image.open(dataset[3]["file"])
|
||||||
|
|
||||||
|
return [image1, image2], [map1, map2]
|
||||||
|
|
||||||
|
|
||||||
@require_torch
|
@require_torch
|
||||||
|
|||||||
@ -184,7 +184,7 @@ class UdopProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
|||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
# set up
|
# set up
|
||||||
datasets = load_dataset("nielsr/funsd")
|
datasets = load_dataset("nielsr/funsd", trust_remote_code=True)
|
||||||
processor = UdopProcessor.from_pretrained("microsoft/udop-large", apply_ocr=False)
|
processor = UdopProcessor.from_pretrained("microsoft/udop-large", apply_ocr=False)
|
||||||
|
|
||||||
def preprocess_data(examples):
|
def preprocess_data(examples):
|
||||||
@ -222,8 +222,12 @@ class UdopProcessorIntegrationTests(unittest.TestCase):
|
|||||||
# we verify our implementation on 2 document images from the DocVQA dataset
|
# we verify our implementation on 2 document images from the DocVQA dataset
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
||||||
return ds[0]["image"], ds[1]["image"]
|
|
||||||
|
image_1 = Image.open(ds[0]["file"]).convert("RGB")
|
||||||
|
image_2 = Image.open(ds[1]["file"]).convert("RGB")
|
||||||
|
|
||||||
|
return image_1, image_2
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def get_tokenizers(self):
|
def get_tokenizers(self):
|
||||||
|
|||||||
@ -566,7 +566,7 @@ class UniSpeechModelIntegrationTest(unittest.TestCase):
|
|||||||
return [x["array"] for x in speech_samples]
|
return [x["array"] for x in speech_samples]
|
||||||
|
|
||||||
def _load_superb(self, task, num_samples):
|
def _load_superb(self, task, num_samples):
|
||||||
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
||||||
|
|
||||||
return ds[:num_samples]
|
return ds[:num_samples]
|
||||||
|
|
||||||
|
|||||||
@ -820,7 +820,7 @@ class UniSpeechSatModelIntegrationTest(unittest.TestCase):
|
|||||||
return [x["array"] for x in speech_samples]
|
return [x["array"] for x in speech_samples]
|
||||||
|
|
||||||
def _load_superb(self, task, num_samples):
|
def _load_superb(self, task, num_samples):
|
||||||
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
||||||
|
|
||||||
return ds[:num_samples]
|
return ds[:num_samples]
|
||||||
|
|
||||||
|
|||||||
@ -637,9 +637,9 @@ class ViltModelIntegrationTest(unittest.TestCase):
|
|||||||
|
|
||||||
processor = self.default_processor
|
processor = self.default_processor
|
||||||
|
|
||||||
dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="train")
|
dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="test", trust_remote_code=True)
|
||||||
image1 = dataset[0]["image"]
|
image1 = Image.open(dataset[0]["file"]).convert("RGB")
|
||||||
image2 = dataset[1]["image"]
|
image2 = Image.open(dataset[1]["file"]).convert("RGB")
|
||||||
|
|
||||||
text = (
|
text = (
|
||||||
"The left image contains twice the number of dogs as the right image, and at least two dogs in total are"
|
"The left image contains twice the number of dogs as the right image, and at least two dogs in total are"
|
||||||
|
|||||||
@ -1149,8 +1149,8 @@ class TrOCRModelIntegrationTest(unittest.TestCase):
|
|||||||
def test_inference_handwritten(self):
|
def test_inference_handwritten(self):
|
||||||
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten").to(torch_device)
|
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten").to(torch_device)
|
||||||
|
|
||||||
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="train")
|
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test", trust_remote_code=True)
|
||||||
image = dataset[0]["image"]
|
image = Image.open(dataset[0]["file"]).convert("RGB")
|
||||||
|
|
||||||
processor = self.default_processor
|
processor = self.default_processor
|
||||||
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device)
|
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device)
|
||||||
@ -1174,8 +1174,8 @@ class TrOCRModelIntegrationTest(unittest.TestCase):
|
|||||||
def test_inference_printed(self):
|
def test_inference_printed(self):
|
||||||
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed").to(torch_device)
|
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed").to(torch_device)
|
||||||
|
|
||||||
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test")
|
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test", trust_remote_code=True)
|
||||||
image = dataset[0]["image"]
|
image = Image.open(dataset[1]["file"]).convert("RGB")
|
||||||
|
|
||||||
processor = self.default_processor
|
processor = self.default_processor
|
||||||
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device)
|
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device)
|
||||||
|
|||||||
@ -97,7 +97,9 @@ def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout):
|
|||||||
try:
|
try:
|
||||||
_ = in_queue.get(timeout=timeout)
|
_ = in_queue.get(timeout=timeout)
|
||||||
|
|
||||||
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
|
ds = load_dataset(
|
||||||
|
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
|
||||||
|
)
|
||||||
sample = next(iter(ds))
|
sample = next(iter(ds))
|
||||||
|
|
||||||
resampled_audio = torchaudio.functional.resample(
|
resampled_audio = torchaudio.functional.resample(
|
||||||
@ -1468,7 +1470,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
|||||||
return [x["array"] for x in speech_samples]
|
return [x["array"] for x in speech_samples]
|
||||||
|
|
||||||
def _load_superb(self, task, num_samples):
|
def _load_superb(self, task, num_samples):
|
||||||
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
||||||
|
|
||||||
return ds[:num_samples]
|
return ds[:num_samples]
|
||||||
|
|
||||||
@ -1834,7 +1836,9 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
|||||||
@require_pyctcdecode
|
@require_pyctcdecode
|
||||||
@require_torchaudio
|
@require_torchaudio
|
||||||
def test_wav2vec2_with_lm(self):
|
def test_wav2vec2_with_lm(self):
|
||||||
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
|
ds = load_dataset(
|
||||||
|
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
|
||||||
|
)
|
||||||
sample = next(iter(ds))
|
sample = next(iter(ds))
|
||||||
|
|
||||||
resampled_audio = torchaudio.functional.resample(
|
resampled_audio = torchaudio.functional.resample(
|
||||||
@ -1858,7 +1862,9 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
|||||||
@require_pyctcdecode
|
@require_pyctcdecode
|
||||||
@require_torchaudio
|
@require_torchaudio
|
||||||
def test_wav2vec2_with_lm_pool(self):
|
def test_wav2vec2_with_lm_pool(self):
|
||||||
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
|
ds = load_dataset(
|
||||||
|
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
|
||||||
|
)
|
||||||
sample = next(iter(ds))
|
sample = next(iter(ds))
|
||||||
|
|
||||||
resampled_audio = torchaudio.functional.resample(
|
resampled_audio = torchaudio.functional.resample(
|
||||||
@ -1957,7 +1963,9 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
|||||||
LANG_MAP = {"it": "ita", "es": "spa", "fr": "fra", "en": "eng"}
|
LANG_MAP = {"it": "ita", "es": "spa", "fr": "fra", "en": "eng"}
|
||||||
|
|
||||||
def run_model(lang):
|
def run_model(lang):
|
||||||
ds = load_dataset("mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True)
|
ds = load_dataset(
|
||||||
|
"mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True, trust_remote_code=True
|
||||||
|
)
|
||||||
sample = next(iter(ds))
|
sample = next(iter(ds))
|
||||||
|
|
||||||
wav2vec2_lang = LANG_MAP[lang]
|
wav2vec2_lang = LANG_MAP[lang]
|
||||||
|
|||||||
@ -463,7 +463,9 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
|
|||||||
def test_word_time_stamp_integration(self):
|
def test_word_time_stamp_integration(self):
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
ds = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
|
ds = load_dataset(
|
||||||
|
"mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True
|
||||||
|
)
|
||||||
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
||||||
ds_iter = iter(ds)
|
ds_iter = iter(ds)
|
||||||
sample = next(ds_iter)
|
sample = next(ds_iter)
|
||||||
|
|||||||
@ -473,7 +473,7 @@ class WavLMModelIntegrationTest(unittest.TestCase):
|
|||||||
return [x["array"] for x in speech_samples]
|
return [x["array"] for x in speech_samples]
|
||||||
|
|
||||||
def _load_superb(self, task, num_samples):
|
def _load_superb(self, task, num_samples):
|
||||||
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
||||||
|
|
||||||
return ds[:num_samples]
|
return ds[:num_samples]
|
||||||
|
|
||||||
|
|||||||
@ -1645,7 +1645,9 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
|||||||
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3")
|
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3")
|
||||||
model.to(torch_device)
|
model.to(torch_device)
|
||||||
|
|
||||||
ds = load_dataset("facebook/multilingual_librispeech", "german", split="test", streaming=True)
|
ds = load_dataset(
|
||||||
|
"facebook/multilingual_librispeech", "german", split="test", streaming=True, trust_remote_code=True
|
||||||
|
)
|
||||||
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
||||||
|
|
||||||
input_speech = next(iter(ds))["audio"]["array"]
|
input_speech = next(iter(ds))["audio"]["array"]
|
||||||
@ -1712,10 +1714,11 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
|||||||
|
|
||||||
token = os.getenv("HF_HUB_READ_TOKEN", True)
|
token = os.getenv("HF_HUB_READ_TOKEN", True)
|
||||||
ds = load_dataset(
|
ds = load_dataset(
|
||||||
"hf-internal-testing/fixtures_common_voice",
|
"mozilla-foundation/common_voice_6_1",
|
||||||
"ja",
|
"ja",
|
||||||
split="test",
|
split="test",
|
||||||
streaming=True,
|
streaming=True,
|
||||||
|
trust_remote_code=True,
|
||||||
token=token,
|
token=token,
|
||||||
)
|
)
|
||||||
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
||||||
|
|||||||
@ -179,7 +179,7 @@ class AudioClassificationPipelineTests(unittest.TestCase):
|
|||||||
model = "superb/wav2vec2-base-superb-ks"
|
model = "superb/wav2vec2-base-superb-ks"
|
||||||
|
|
||||||
audio_classifier = pipeline("audio-classification", model=model)
|
audio_classifier = pipeline("audio-classification", model=model)
|
||||||
dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test")
|
dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test", trust_remote_code=True)
|
||||||
|
|
||||||
audio = np.array(dataset[3]["speech"], dtype=np.float32)
|
audio = np.array(dataset[3]["speech"], dtype=np.float32)
|
||||||
output = audio_classifier(audio, top_k=4)
|
output = audio_classifier(audio, top_k=4)
|
||||||
|
|||||||
@ -265,7 +265,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
|||||||
@require_torch
|
@require_torch
|
||||||
@require_pyctcdecode
|
@require_pyctcdecode
|
||||||
def test_large_model_pt_with_lm(self):
|
def test_large_model_pt_with_lm(self):
|
||||||
filename = hf_hub_download("Narsil/asr_dummy", filename="4.flac", repo_type="dataset")
|
dataset = load_dataset("Narsil/asr_dummy", streaming=True, trust_remote_code=True)
|
||||||
|
third_item = next(iter(dataset["test"].skip(3)))
|
||||||
|
filename = third_item["file"]
|
||||||
|
|
||||||
speech_recognizer = pipeline(
|
speech_recognizer = pipeline(
|
||||||
task="automatic-speech-recognition",
|
task="automatic-speech-recognition",
|
||||||
@ -386,7 +388,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
|||||||
chunk_length_s=8,
|
chunk_length_s=8,
|
||||||
stride_length_s=1,
|
stride_length_s=1,
|
||||||
)
|
)
|
||||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
|
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
|
||||||
sample = next(iter(data))
|
sample = next(iter(data))
|
||||||
|
|
||||||
res = pipe(sample["audio"]["array"])
|
res = pipe(sample["audio"]["array"])
|
||||||
@ -432,7 +434,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
|||||||
stride_length_s=1,
|
stride_length_s=1,
|
||||||
return_language=True,
|
return_language=True,
|
||||||
)
|
)
|
||||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
|
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
|
||||||
sample = next(iter(data))
|
sample = next(iter(data))
|
||||||
|
|
||||||
res = pipe(sample["audio"]["array"])
|
res = pipe(sample["audio"]["array"])
|
||||||
@ -487,7 +489,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
|||||||
task="automatic-speech-recognition",
|
task="automatic-speech-recognition",
|
||||||
model="openai/whisper-tiny.en",
|
model="openai/whisper-tiny.en",
|
||||||
)
|
)
|
||||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
|
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
|
||||||
samples = [next(iter(data)) for _ in range(8)]
|
samples = [next(iter(data)) for _ in range(8)]
|
||||||
audio = np.concatenate([sample["audio"]["array"] for sample in samples])
|
audio = np.concatenate([sample["audio"]["array"] for sample in samples])
|
||||||
|
|
||||||
@ -1123,7 +1125,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
|||||||
@slow
|
@slow
|
||||||
def test_speculative_decoding_whisper_non_distil(self):
|
def test_speculative_decoding_whisper_non_distil(self):
|
||||||
# Load data:
|
# Load data:
|
||||||
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]")
|
dataset = load_dataset(
|
||||||
|
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True
|
||||||
|
)
|
||||||
sample = dataset[0]["audio"]
|
sample = dataset[0]["audio"]
|
||||||
|
|
||||||
# Load model:
|
# Load model:
|
||||||
@ -1165,7 +1169,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
|||||||
@slow
|
@slow
|
||||||
def test_speculative_decoding_whisper_distil(self):
|
def test_speculative_decoding_whisper_distil(self):
|
||||||
# Load data:
|
# Load data:
|
||||||
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]")
|
dataset = load_dataset(
|
||||||
|
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True
|
||||||
|
)
|
||||||
sample = dataset[0]["audio"]
|
sample = dataset[0]["audio"]
|
||||||
|
|
||||||
# Load model:
|
# Load model:
|
||||||
|
|||||||
@ -601,7 +601,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
|
|||||||
|
|
||||||
image_segmenter = pipeline("image-segmentation", model=model, image_processor=image_processor)
|
image_segmenter = pipeline("image-segmentation", model=model, image_processor=image_processor)
|
||||||
|
|
||||||
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||||
file = image[0]["file"]
|
file = image[0]["file"]
|
||||||
outputs = image_segmenter(file, threshold=threshold)
|
outputs = image_segmenter(file, threshold=threshold)
|
||||||
|
|
||||||
@ -655,7 +655,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
|
|||||||
def test_oneformer(self):
|
def test_oneformer(self):
|
||||||
image_segmenter = pipeline(model="shi-labs/oneformer_ade20k_swin_tiny")
|
image_segmenter = pipeline(model="shi-labs/oneformer_ade20k_swin_tiny")
|
||||||
|
|
||||||
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||||
file = image[0]["file"]
|
file = image[0]["file"]
|
||||||
outputs = image_segmenter(file, threshold=0.99)
|
outputs = image_segmenter(file, threshold=0.99)
|
||||||
# Shortening by hashing
|
# Shortening by hashing
|
||||||
|
|||||||
@ -3799,20 +3799,8 @@ class ModelTesterMixin:
|
|||||||
self.skipTest(reason="Idefics currently (transformers==4.39.1) requires an image_attention_mask input")
|
self.skipTest(reason="Idefics currently (transformers==4.39.1) requires an image_attention_mask input")
|
||||||
if config.model_type in ["sam"]:
|
if config.model_type in ["sam"]:
|
||||||
self.skipTest(reason="SAM requires an attention_mask input for relative positional embeddings")
|
self.skipTest(reason="SAM requires an attention_mask input for relative positional embeddings")
|
||||||
|
|
||||||
model = model_class(config)
|
model = model_class(config)
|
||||||
|
|
||||||
sub_models_supporting_sdpa = [
|
|
||||||
module._supports_sdpa
|
|
||||||
for name, module in model.named_modules()
|
|
||||||
if isinstance(module, PreTrainedModel) and name != ""
|
|
||||||
]
|
|
||||||
supports_sdpa_all_modules = (
|
|
||||||
all(sub_models_supporting_sdpa) if len(sub_models_supporting_sdpa) > 0 else model._supports_sdpa
|
|
||||||
)
|
|
||||||
if not supports_sdpa_all_modules:
|
|
||||||
self.skipTest(reason="This models' submodels does not support sdpa")
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||||
model.save_pretrained(tmpdirname)
|
model.save_pretrained(tmpdirname)
|
||||||
model = model_class.from_pretrained(tmpdirname, torch_dtype=torch.float16, attn_implementation="sdpa")
|
model = model_class.from_pretrained(tmpdirname, torch_dtype=torch.float16, attn_implementation="sdpa")
|
||||||
@ -3860,20 +3848,8 @@ class ModelTesterMixin:
|
|||||||
"Cannot compile forward without an existing cache with Hybrid, as `torch._dynamo.mark_static_address` "
|
"Cannot compile forward without an existing cache with Hybrid, as `torch._dynamo.mark_static_address` "
|
||||||
"is a forbidden call."
|
"is a forbidden call."
|
||||||
)
|
)
|
||||||
|
|
||||||
model = model_class(config)
|
model = model_class(config)
|
||||||
|
|
||||||
sub_models_supporting_sdpa = [
|
|
||||||
module._supports_sdpa
|
|
||||||
for name, module in model.named_modules()
|
|
||||||
if isinstance(module, PreTrainedModel) and name != ""
|
|
||||||
]
|
|
||||||
supports_sdpa_all_modules = (
|
|
||||||
all(sub_models_supporting_sdpa) if len(sub_models_supporting_sdpa) > 0 else model._supports_sdpa
|
|
||||||
)
|
|
||||||
if not supports_sdpa_all_modules:
|
|
||||||
self.skipTest(reason="This models' submodels does not support sdpa")
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||||
model.save_pretrained(tmpdirname)
|
model.save_pretrained(tmpdirname)
|
||||||
model = model_class.from_pretrained(tmpdirname, torch_dtype=torch.float16, attn_implementation="sdpa")
|
model = model_class.from_pretrained(tmpdirname, torch_dtype=torch.float16, attn_implementation="sdpa")
|
||||||
|
|||||||
@ -1792,25 +1792,6 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
|||||||
self.assertEqual(modeling_llama.apply_rotary_pos_emb, liger_rotary_pos_emb)
|
self.assertEqual(modeling_llama.apply_rotary_pos_emb, liger_rotary_pos_emb)
|
||||||
self.assertTrue(isinstance(tiny_llama.model.norm, LigerRMSNorm))
|
self.assertTrue(isinstance(tiny_llama.model.norm, LigerRMSNorm))
|
||||||
|
|
||||||
@require_liger_kernel
|
|
||||||
def test_use_liger_kernel_custom_config_patching(self):
|
|
||||||
# Ensure any monkey patching is cleaned up for subsequent tests
|
|
||||||
with patch("transformers.models.llama.modeling_llama"):
|
|
||||||
from liger_kernel.transformers import LigerRMSNorm
|
|
||||||
|
|
||||||
config = LlamaConfig(vocab_size=100, hidden_size=32, num_hidden_layers=3, num_attention_heads=4)
|
|
||||||
tiny_llama = LlamaForCausalLM(config)
|
|
||||||
|
|
||||||
args = TrainingArguments(
|
|
||||||
self.get_auto_remove_tmp_dir(),
|
|
||||||
use_liger_kernel=True,
|
|
||||||
liger_kernel_config={"rms_norm": False}, # Don't apply Liger's RMSNorm
|
|
||||||
)
|
|
||||||
Trainer(tiny_llama, args)
|
|
||||||
|
|
||||||
# Check that the RMSNorm kernel is not applied as specified in the config
|
|
||||||
self.assertFalse(isinstance(tiny_llama.model.norm, LigerRMSNorm))
|
|
||||||
|
|
||||||
@require_liger_kernel
|
@require_liger_kernel
|
||||||
@require_torch_accelerator
|
@require_torch_accelerator
|
||||||
def test_use_liger_kernel_trainer(self):
|
def test_use_liger_kernel_trainer(self):
|
||||||
@ -1829,29 +1810,6 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
|||||||
# Check this works
|
# Check this works
|
||||||
_ = trainer.train()
|
_ = trainer.train()
|
||||||
|
|
||||||
@require_liger_kernel
|
|
||||||
@require_torch_accelerator
|
|
||||||
def test_use_liger_kernel_custom_config_trainer(self):
|
|
||||||
# Check that trainer still works with liger kernel applied when using a custom config
|
|
||||||
config = LlamaConfig(vocab_size=100, hidden_size=32, num_hidden_layers=3, num_attention_heads=4)
|
|
||||||
tiny_llama = LlamaForCausalLM(config)
|
|
||||||
|
|
||||||
x = torch.randint(0, 100, (128,))
|
|
||||||
train_dataset = RepeatDataset(x)
|
|
||||||
|
|
||||||
args = TrainingArguments(
|
|
||||||
self.get_auto_remove_tmp_dir(),
|
|
||||||
learning_rate=1e-2,
|
|
||||||
logging_steps=5,
|
|
||||||
max_steps=20,
|
|
||||||
use_liger_kernel=True,
|
|
||||||
liger_kernel_config={"rms_norm": False, "cross_entropy": True, "fused_linear_cross_entropy": False},
|
|
||||||
)
|
|
||||||
trainer = Trainer(tiny_llama, args, train_dataset=train_dataset)
|
|
||||||
|
|
||||||
# Check this works
|
|
||||||
_ = trainer.train()
|
|
||||||
|
|
||||||
@require_lomo
|
@require_lomo
|
||||||
@require_torch_accelerator
|
@require_torch_accelerator
|
||||||
def test_lomo(self):
|
def test_lomo(self):
|
||||||
|
|||||||
@ -133,19 +133,10 @@ if __name__ == "__main__":
|
|||||||
# Assuming there is a topological sort on the dependency mapping: if the file being checked and its dependencies
|
# Assuming there is a topological sort on the dependency mapping: if the file being checked and its dependencies
|
||||||
# are not in the diff, then there it is guaranteed to have no differences. If no models are in the diff, then this
|
# are not in the diff, then there it is guaranteed to have no differences. If no models are in the diff, then this
|
||||||
# script will do nothing.
|
# script will do nothing.
|
||||||
current_branch = subprocess.check_output(["git", "branch", "--show-current"], text=True).strip()
|
models_in_diff = get_models_in_diff()
|
||||||
if current_branch == "main":
|
if not models_in_diff:
|
||||||
console.print(
|
console.print("[bold green]No models files or model tests in the diff, skipping modular checks[/bold green]")
|
||||||
"[bold red]You are developing on the main branch. We cannot identify the list of changed files and will have to check all files. This may take a while.[/bold red]"
|
exit(0)
|
||||||
)
|
|
||||||
models_in_diff = {file_path.split("/")[-2] for file_path in args.files}
|
|
||||||
else:
|
|
||||||
models_in_diff = get_models_in_diff()
|
|
||||||
if not models_in_diff:
|
|
||||||
console.print(
|
|
||||||
"[bold green]No models files or model tests in the diff, skipping modular checks[/bold green]"
|
|
||||||
)
|
|
||||||
exit(0)
|
|
||||||
|
|
||||||
skipped_models = set()
|
skipped_models = set()
|
||||||
non_matching_files = 0
|
non_matching_files = 0
|
||||||
@ -158,8 +149,7 @@ if __name__ == "__main__":
|
|||||||
skipped_models.add(model_name)
|
skipped_models.add(model_name)
|
||||||
continue
|
continue
|
||||||
non_matching_files += compare_files(modular_file_path, args.fix_and_overwrite)
|
non_matching_files += compare_files(modular_file_path, args.fix_and_overwrite)
|
||||||
if current_branch != "main":
|
models_in_diff = get_models_in_diff() # When overwriting, the diff changes
|
||||||
models_in_diff = get_models_in_diff() # When overwriting, the diff changes
|
|
||||||
else:
|
else:
|
||||||
new_ordered_files = []
|
new_ordered_files = []
|
||||||
for modular_file_path in ordered_files:
|
for modular_file_path in ordered_files:
|
||||||
|
|||||||
59
utils/get_runner_map.py
Normal file
59
utils/get_runner_map.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
This script is used to get a map containing the information of runners to use in GitHub Actions workflow files.
|
||||||
|
This is meant to be a temporary file that helps us to switch progressively from T4 to A10 runners.
|
||||||
|
|
||||||
|
The data is stored in a Hub repository [hf-internal-testing/transformers_daily_ci](https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/blob/main/runner_map.json).
|
||||||
|
Currently, in that file, we specify the models for which we want to run the tests with T4 runners to avoid many test failures showing on the CI reports.
|
||||||
|
We will work on the tests toward to use A10 for all CI jobs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
# T4
|
||||||
|
t4_runners = {
|
||||||
|
"single-gpu": "aws-g4dn-4xlarge-cache",
|
||||||
|
"multi-gpu": "aws-g4dn-12xlarge-cache",
|
||||||
|
}
|
||||||
|
|
||||||
|
# A10
|
||||||
|
a10_runners = {
|
||||||
|
"single-gpu": "aws-g5-4xlarge-cache",
|
||||||
|
"multi-gpu": "aws-g5-12xlarge-cache",
|
||||||
|
}
|
||||||
|
|
||||||
|
tests = os.getcwd()
|
||||||
|
model_tests = os.listdir(os.path.join(tests, "models"))
|
||||||
|
d1 = sorted(filter(os.path.isdir, os.listdir(tests)))
|
||||||
|
d2 = sorted(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))
|
||||||
|
d1.remove("models")
|
||||||
|
d = d2 + d1
|
||||||
|
|
||||||
|
response = requests.get("https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/resolve/main/runner_map.json")
|
||||||
|
# The models that we want to run with T4 runners
|
||||||
|
runner_map = response.json()
|
||||||
|
|
||||||
|
for key in d:
|
||||||
|
if key in runner_map:
|
||||||
|
runner_map[key] = t4_runners
|
||||||
|
else:
|
||||||
|
runner_map[key] = a10_runners
|
||||||
|
|
||||||
|
print(runner_map)
|
||||||
@ -1494,7 +1494,7 @@ if __name__ == "__main__":
|
|||||||
other_ci_artifacts=other_ci_artifacts,
|
other_ci_artifacts=other_ci_artifacts,
|
||||||
)
|
)
|
||||||
|
|
||||||
# send report only if there is any failure (for push CI)
|
# # send report only if there is any failure (for push CI)
|
||||||
if message.n_failures or (ci_event != "push" and not ci_event.startswith("Push CI (AMD)")):
|
# if message.n_failures or (ci_event != "push" and not ci_event.startswith("Push CI (AMD)")):
|
||||||
message.post()
|
# message.post()
|
||||||
message.post_reply()
|
# message.post_reply()
|
||||||
|
|||||||
@ -62,4 +62,5 @@ if __name__ == "__main__":
|
|||||||
start = end
|
start = end
|
||||||
end = start + num_jobs_per_splits + (1 if idx < num_jobs % args.num_splits else 0)
|
end = start + num_jobs_per_splits + (1 if idx < num_jobs % args.num_splits else 0)
|
||||||
model_splits.append(d[start:end])
|
model_splits.append(d[start:end])
|
||||||
|
model_splits = [['models/vit', 'generation'], ['models/clip', 'models/vits']]
|
||||||
print(model_splits)
|
print(model_splits)
|
||||||
|
|||||||
Reference in New Issue
Block a user