Compare commits

..

21 Commits

Author SHA1 Message Date
6d38d27ef3 trigger for Remove script datasets in tests #38940 2025-06-20 20:12:47 +02:00
20c0f8bc77 pick 2025-06-20 20:10:35 +02:00
9b2afaf02d fix integration test 2025-06-20 19:54:27 +02:00
d188134b95 style 2025-06-20 17:43:26 +02:00
e2ed15c465 again 2025-06-20 17:42:17 +02:00
005459827e again 2025-06-20 14:44:38 +02:00
69419a4935 style 2025-06-20 14:37:26 +02:00
1fdb9f3908 again 2025-06-20 13:19:50 +02:00
3dfebf2fc0 Revert "Skip some tests for now (#38931)"
This reverts commit 31d30b72245aacfdf70249165964b53790d9c4d8.
2025-06-20 13:00:47 +02:00
e6093deb18 again 2025-06-20 13:00:25 +02:00
b7ec09c2f4 remove trust_remote_code 2025-06-20 13:00:25 +02:00
aa42987c1e Remove ALL_LAYERNORM_LAYERS (#38922)
* remove it everywhere

* Update trainer_pt_utils.py

* Update trainer_pt_utils.py

* style

* sort list in test

* CIs

* use recursion same way as before (for intermediate layer names)
2025-06-20 12:06:48 +02:00
38a9b70786 add pytorch-xpu Dockerfile (#38875)
* first commit

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

* use rls pytorch

Signed-off-by: YAO Matrix <matrix.yao@intel.com>

---------

Signed-off-by: YAO Matrix <matrix.yao@intel.com>
2025-06-20 11:42:44 +02:00
9bcdd5cde9 Modernbert fixes (#38912)
* Removed deprecated argument in modernbert RotaryEmbedding

* Skip test_sdpa_can_dispatch_on_flash for modernbert

---------

Co-authored-by: ivarflakstad <69173633+ivarflakstad@users.noreply.github.com>
Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
2025-06-20 11:22:32 +02:00
31d30b7224 Skip some tests for now (#38931)
* try

* [test all]

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2025-06-20 11:05:49 +02:00
0725cd6953 Remove deprecated classes in modeling_utils.py (#38919)
* remove deprecated classes

* style
2025-06-19 19:25:20 +02:00
797860c68c feat: add flexible Liger Kernel configuration to TrainingArguments (#38911)
* feat: add flexible Liger Kernel configuration to TrainingArguments

Add support for granular Liger Kernel configuration through a new
`liger_kernel_config` parameter in TrainingArguments. This allows users
to selectively enable/disable specific kernels (rope, swiglu, cross_entropy,
etc.) instead of the current approach that rely on default configuration.

Features:
- Add `liger_kernel_config` dict parameter to TrainingArguments
- Support selective kernel application for all supported models
- Maintain full backward compatibility with existing `use_liger_kernel` flag

Example usage:
```python
TrainingArguments(
    use_liger_kernel=True,
    liger_kernel_config={
        "rope": True,
        "swiglu": True,
        "cross_entropy": False,
        "fused_linear_cross_entropy": True
    }
)
Closes #38905

* Address comments and update Liger section in Trainer docs
2025-06-19 15:54:08 +00:00
89b35be618 Allow make-fixup on main branch, albeit slowly (#38892)
* Allow make-fixup on main branch, albeit slowly

* Make the other style checks work correctly on main too

* More update

* More makefile update
2025-06-19 15:22:59 +01:00
9a02e7602d feat: Add granite architectures to auto tokenizer name mappings (#38802)
Branch: GraniteTokenizerMapping

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
2025-06-19 15:20:42 +01:00
54a02160eb Fix ReDOS in tokenizer digit substitution (#38844)
* Fix regexes vulnerable to ReDOS

* Let's just use regex

* Import regex/re correctly
2025-06-19 14:53:52 +01:00
af6120b3eb Skip sdpa tests if submodule does not support sdpa (#38907) 2025-06-19 13:11:01 +00:00
65 changed files with 430 additions and 822 deletions

View File

@ -12,8 +12,8 @@ on:
slice_id:
required: true
type: number
runner_map:
required: false
runner:
required: true
type: string
docker:
required: true
@ -45,7 +45,7 @@ jobs:
matrix:
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
runs-on:
group: ${{ fromJson(inputs.runner_map)[matrix.folders][inputs.machine_type] }}
group: '${{ inputs.machine_type }}'
container:
image: ${{ inputs.docker }}
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/

128
.github/workflows/model_jobs_amd.yml vendored Normal file
View File

@ -0,0 +1,128 @@
name: model jobs
on:
workflow_call:
inputs:
folder_slices:
required: true
type: string
machine_type:
required: true
type: string
slice_id:
required: true
type: number
runner:
required: true
type: string
docker:
required: true
type: string
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
CUDA_VISIBLE_DEVICES: 0,1
jobs:
run_models_gpu:
name: " "
strategy:
max-parallel: 1 # For now, not to parallelize. Can change later if it works well.
fail-fast: false
matrix:
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
runs-on: ['${{ inputs.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
image: ${{ inputs.docker }}
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Echo input and matrix info
shell: bash
run: |
echo "${{ inputs.folder_slices }}"
echo "${{ matrix.folders }}"
echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}"
- name: Echo folder ${{ matrix.folders }}
shell: bash
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
# set the artifact folder names (because the character `/` is not allowed).
run: |
echo "${{ matrix.folders }}"
matrix_folders=${{ matrix.folders }}
matrix_folders=${matrix_folders/'models/'/'models_'}
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: Update / Install some packages (for Past CI)
if: ${{ contains(inputs.docker, '-past-') }}
working-directory: /transformers
run: |
python3 -m pip install -U datasets
- name: Update / Install some packages (for Past CI)
if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
working-directory: /transformers
run: |
python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all tests on GPU
working-directory: /transformers
run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} -m "not not_device_test"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
- name: Run test
shell: bash
run: |
mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
- name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports

View File

@ -7,7 +7,7 @@ on:
- cron: "17 2 * * *"
push:
branches:
- allow_ci_to_use_a10
- trigger-remove-script-datasets-in-tests
workflow_dispatch:
inputs:
prev_workflow_run_id:
@ -22,10 +22,10 @@ on:
default: ""
# Used for `push` to easily modify the target workflow runs to compare against
# Used for `push` to easily modiffy the target workflow runs to compare against
env:
prev_workflow_run_id: ""
other_workflow_run_id: ""
other_workflow_run_id: "15770139098"
jobs:
@ -51,68 +51,8 @@ jobs:
with:
job: run_models_gpu
slack_report_channel: "#transformers-ci-daily-models"
runner: daily-ci
docker: huggingface/transformers-all-latest-gpu
ci_event: Daily CI
report_repo_id: hf-internal-testing/transformers_daily_ci
secrets: inherit
# torch-pipeline:
# name: Torch pipeline CI
# uses: ./.github/workflows/self-scheduled.yml
# with:
# job: run_pipelines_torch_gpu
# slack_report_channel: "#transformers-ci-daily-pipeline-torch"
# runner: daily-ci
# docker: huggingface/transformers-pytorch-gpu
# ci_event: Daily CI
# report_repo_id: hf-internal-testing/transformers_daily_ci
# secrets: inherit
#
# example-ci:
# name: Example CI
# uses: ./.github/workflows/self-scheduled.yml
# with:
# job: run_examples_gpu
# slack_report_channel: "#transformers-ci-daily-examples"
# runner: daily-ci
# docker: huggingface/transformers-all-latest-gpu
# ci_event: Daily CI
# report_repo_id: hf-internal-testing/transformers_daily_ci
# secrets: inherit
#
# trainer-fsdp-ci:
# name: Trainer/FSDP CI
# uses: ./.github/workflows/self-scheduled.yml
# with:
# job: run_trainer_and_fsdp_gpu
# slack_report_channel: "#transformers-ci-daily-training"
# runner: daily-ci
# docker: huggingface/transformers-all-latest-gpu
# ci_event: Daily CI
# report_repo_id: hf-internal-testing/transformers_daily_ci
# secrets: inherit
#
# deepspeed-ci:
# name: DeepSpeed CI
# uses: ./.github/workflows/self-scheduled.yml
# with:
# job: run_torch_cuda_extensions_gpu
# slack_report_channel: "#transformers-ci-daily-training"
# runner: daily-ci
# docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
# ci_event: Daily CI
# working-directory-prefix: /workspace
# report_repo_id: hf-internal-testing/transformers_daily_ci
# secrets: inherit
#
# quantization-ci:
# name: Quantization CI
# uses: ./.github/workflows/self-scheduled.yml
# with:
# job: run_quantization_torch_gpu
# slack_report_channel: "#transformers-ci-daily-quantization"
# runner: daily-ci
# docker: huggingface/transformers-quantization-latest-gpu
# ci_event: Daily CI
# report_repo_id: hf-internal-testing/transformers_daily_ci
# secrets: inherit

View File

@ -15,6 +15,9 @@ on:
slack_report_channel:
required: true
type: string
runner:
required: true
type: string
docker:
required: true
type: string
@ -59,7 +62,6 @@ jobs:
outputs:
folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
runner_map: ${{ steps.set-matrix.outputs.runner_map }}
quantization_matrix: ${{ steps.set-matrix-quantization.outputs.quantization_matrix }}
steps:
- name: Update clone
@ -86,7 +88,6 @@ jobs:
if [ "${{ inputs.job }}" = "run_models_gpu" ]; then
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
echo "runner_map=$(python3 ../utils/get_runner_map.py)" >> $GITHUB_OUTPUT
elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then
echo "folder_slices=[['trainer'], ['fsdp']]" >> $GITHUB_OUTPUT
echo "slice_ids=[0, 1]" >> $GITHUB_OUTPUT
@ -110,14 +111,14 @@ jobs:
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache]
slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
uses: ./.github/workflows/model_jobs.yml
with:
folder_slices: ${{ needs.setup.outputs.folder_slices }}
machine_type: ${{ matrix.machine_type }}
slice_id: ${{ matrix.slice_id }}
runner_map: ${{ needs.setup.outputs.runner_map }}
runner: ${{ inputs.runner }}
docker: ${{ inputs.docker }}
secrets: inherit
@ -135,6 +136,7 @@ jobs:
folder_slices: ${{ needs.setup.outputs.folder_slices }}
machine_type: ${{ matrix.machine_type }}
slice_id: ${{ matrix.slice_id }}
runner: ${{ inputs.runner }}
docker: ${{ inputs.docker }}
report_name_prefix: run_trainer_and_fsdp_gpu
secrets: inherit

View File

@ -8,13 +8,19 @@ check_dirs := examples tests src utils
exclude_folders := ""
modified_only_fixup:
$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
@if test -n "$(modified_py_files)"; then \
echo "Checking/fixing $(modified_py_files)"; \
ruff check $(modified_py_files) --fix --exclude $(exclude_folders); \
ruff format $(modified_py_files) --exclude $(exclude_folders);\
@current_branch=$$(git branch --show-current); \
if [ "$$current_branch" = "main" ]; then \
echo "On main branch, running 'style' target instead..."; \
$(MAKE) style; \
else \
echo "No library .py files were modified"; \
modified_py_files=$$(python utils/get_modified_files.py $(check_dirs)); \
if [ -n "$$modified_py_files" ]; then \
echo "Checking/fixing files: $${modified_py_files}"; \
ruff check $${modified_py_files} --fix --exclude $(exclude_folders); \
ruff format $${modified_py_files} --exclude $(exclude_folders); \
else \
echo "No library .py files were modified"; \
fi; \
fi
# Update src/transformers/dependency_versions_table.py

View File

@ -56,7 +56,7 @@ Here is how to use the processor to process text and audio:
```python
>>> # let's load an audio sample from an Arabic speech corpus
>>> from datasets import load_dataset
>>> dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True, trust_remote_code=True)
>>> dataset = load_dataset("halabi2016/arabic_speech_corpus", split="test", streaming=True)
>>> audio_sample = next(iter(dataset))["audio"]
>>> # now, process it

View File

@ -56,7 +56,7 @@ Here is how to use the processor to process text and audio:
```python
>>> # let's load an audio sample from an Arabic speech corpus
>>> from datasets import load_dataset
>>> dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True, trust_remote_code=True)
>>> dataset = load_dataset("halabi2016/arabic_speech_corpus", split="test", streaming=True)
>>> audio_sample = next(iter(dataset))["audio"]
>>> # now, process it

View File

@ -493,6 +493,33 @@ training_args = TrainingArguments(
)
```
You can also configure which specific kernels to apply using the `liger_kernel_config` parameter. This dict is passed as keyword arguments to the `_apply_liger_kernel_to_instance` function, allowing fine-grained control over kernel usage. Available options vary by model but typically include: `rope`, `swiglu`, `cross_entropy`, `fused_linear_cross_entropy`, `rms_norm`, etc.
```py
from transformers import TrainingArguments
# Apply only specific kernels
training_args = TrainingArguments(
output_dir="your-model",
learning_rate=2e-5,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
num_train_epochs=2,
weight_decay=0.01,
eval_strategy="epoch",
save_strategy="epoch",
load_best_model_at_end=True,
push_to_hub=True,
use_liger_kernel=True,
liger_kernel_config={
"rope": True,
"cross_entropy": True,
"rms_norm": False, # Don't apply Liger's RMSNorm kernel
"swiglu": True,
}
)
```
### NEFTune
[NEFTune](https://hf.co/papers/2310.05914) adds noise to the embedding vectors during training to improve model performance. Enable it in [`Trainer`] with the `neftune_noise_alpha` parameter in [`TrainingArguments`] to control how much noise is added.

View File

@ -264,7 +264,6 @@ class ExamplesTests(TestCasePlus):
--dataset_config clean
--train_split_name validation
--eval_split_name validation
--trust_remote_code
--output_dir {tmp_dir}
--overwrite_output_dir
--num_train_epochs=2

View File

@ -312,7 +312,6 @@ class ExamplesTestsNoTrainer(TestCasePlus):
{self.examples_dir}/pytorch/image-classification/run_image_classification_no_trainer.py
--model_name_or_path google/vit-base-patch16-224-in21k
--dataset_name hf-internal-testing/cats_vs_dogs_sample
--trust_remote_code
--learning_rate 1e-4
--per_device_train_batch_size 2
--per_device_eval_batch_size 1

View File

@ -390,7 +390,6 @@ class ExamplesTests(TestCasePlus):
--output_dir {tmp_dir}
--model_name_or_path google/vit-base-patch16-224-in21k
--dataset_name hf-internal-testing/cats_vs_dogs_sample
--trust_remote_code
--do_train
--do_eval
--learning_rate 1e-4
@ -424,7 +423,6 @@ class ExamplesTests(TestCasePlus):
--dataset_config_name clean
--train_split_name validation
--eval_split_name validation
--trust_remote_code
--do_train
--do_eval
--learning_rate 1e-4
@ -455,7 +453,6 @@ class ExamplesTests(TestCasePlus):
--dataset_config_name clean
--train_split_name validation
--eval_split_name validation
--trust_remote_code
--do_train
--do_eval
--learning_rate 1e-4
@ -488,7 +485,6 @@ class ExamplesTests(TestCasePlus):
--dataset_config_name clean
--train_split_name validation
--eval_split_name validation
--trust_remote_code
--do_train
--do_eval
--learning_rate 1e-4
@ -516,7 +512,6 @@ class ExamplesTests(TestCasePlus):
--output_dir {tmp_dir}
--model_name_or_path hf-internal-testing/tiny-random-wav2vec2
--dataset_name anton-l/superb_demo
--trust_remote_code
--dataset_config_name ks
--train_split_name test
--eval_split_name test
@ -551,7 +546,6 @@ class ExamplesTests(TestCasePlus):
--dataset_name hf-internal-testing/librispeech_asr_dummy
--dataset_config_names clean
--dataset_split_names validation
--trust_remote_code
--learning_rate 1e-4
--per_device_train_batch_size 4
--per_device_eval_batch_size 4
@ -572,7 +566,6 @@ class ExamplesTests(TestCasePlus):
run_mae.py
--output_dir {tmp_dir}
--dataset_name hf-internal-testing/cats_vs_dogs_sample
--trust_remote_code
--do_train
--do_eval
--learning_rate 1e-4

View File

@ -315,7 +315,6 @@ class ExamplesTests(TestCasePlus):
testargs = f"""
run_image_classification.py
--dataset_name hf-internal-testing/cats_vs_dogs_sample
--trust_remote_code
--model_name_or_path microsoft/resnet-18
--do_train
--do_eval

View File

@ -29,7 +29,6 @@ import warnings
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed
from contextlib import contextmanager
from dataclasses import dataclass
from enum import Enum
from functools import partial, wraps
from threading import Thread
@ -41,7 +40,6 @@ from huggingface_hub import split_torch_state_dict_into_shards
from packaging import version
from torch import Tensor, nn
from torch.distributions import constraints
from torch.nn import CrossEntropyLoss, Identity
from torch.utils.checkpoint import checkpoint
from transformers.utils import is_torchao_available
@ -50,7 +48,6 @@ from transformers.utils import is_torchao_available
if is_torchao_available():
from torchao.quantization import Int4WeightOnlyConfig
from .activations import get_activation
from .configuration_utils import PretrainedConfig
from .dynamic_module_utils import custom_object_save
from .generation import CompileConfig, GenerationConfig
@ -98,7 +95,6 @@ from .utils import (
WEIGHTS_INDEX_NAME,
WEIGHTS_NAME,
ContextManagers,
ModelOutput,
PushToHubMixin,
cached_file,
check_torch_load_is_safe,
@ -123,7 +119,6 @@ from .utils import (
is_torch_xla_available,
is_torch_xpu_available,
logging,
replace_return_docstrings,
strtobool,
)
from .utils.generic import GeneralInterface
@ -5624,453 +5619,6 @@ if PreTrainedModel.push_to_hub.__doc__ is not None:
)
class PoolerStartLogits(nn.Module):
"""
Compute SQuAD start logits from sequence hidden states.
Args:
config ([`PretrainedConfig`]):
The config used by the model, will be used to grab the `hidden_size` of the model.
"""
def __init__(self, config: PretrainedConfig):
super().__init__()
self.dense = nn.Linear(config.hidden_size, 1)
logger.warning_once(
"[DEPRECATION WARNING] `PoolerStartLogits` is deprecated and will be removed in v4.53. "
"Please use model-specific class, e.g. `XLMPoolerStartLogits`."
)
def forward(
self, hidden_states: torch.FloatTensor, p_mask: Optional[torch.FloatTensor] = None
) -> torch.FloatTensor:
"""
Args:
hidden_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`):
The final hidden states of the model.
p_mask (`torch.FloatTensor` of shape `(batch_size, seq_len)`, *optional*):
Mask for tokens at invalid position, such as query and special symbols (PAD, SEP, CLS). 1.0 means token
should be masked.
Returns:
`torch.FloatTensor`: The start logits for SQuAD.
"""
x = self.dense(hidden_states).squeeze(-1)
if p_mask is not None:
if get_parameter_dtype(self) == torch.float16:
x = x * (1 - p_mask) - 65500 * p_mask
else:
x = x * (1 - p_mask) - 1e30 * p_mask
return x
class PoolerEndLogits(nn.Module):
"""
Compute SQuAD end logits from sequence hidden states.
Args:
config ([`PretrainedConfig`]):
The config used by the model, will be used to grab the `hidden_size` of the model and the `layer_norm_eps`
to use.
"""
def __init__(self, config: PretrainedConfig):
super().__init__()
self.dense_0 = nn.Linear(config.hidden_size * 2, config.hidden_size)
self.activation = nn.Tanh()
self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
self.dense_1 = nn.Linear(config.hidden_size, 1)
logger.warning_once(
"[DEPRECATION WARNING] `PoolerEndLogits` is deprecated and will be removed in v4.53. "
"Please use model-specific class, e.g. `XLMPoolerEndLogits`."
)
def forward(
self,
hidden_states: torch.FloatTensor,
start_states: Optional[torch.FloatTensor] = None,
start_positions: Optional[torch.LongTensor] = None,
p_mask: Optional[torch.FloatTensor] = None,
) -> torch.FloatTensor:
"""
Args:
hidden_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`):
The final hidden states of the model.
start_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`, *optional*):
The hidden states of the first tokens for the labeled span.
start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
The position of the first token for the labeled span.
p_mask (`torch.FloatTensor` of shape `(batch_size, seq_len)`, *optional*):
Mask for tokens at invalid position, such as query and special symbols (PAD, SEP, CLS). 1.0 means token
should be masked.
<Tip>
One of `start_states` or `start_positions` should be not `None`. If both are set, `start_positions` overrides
`start_states`.
</Tip>
Returns:
`torch.FloatTensor`: The end logits for SQuAD.
"""
assert start_states is not None or start_positions is not None, (
"One of start_states, start_positions should be not None"
)
if start_positions is not None:
slen, hsz = hidden_states.shape[-2:]
start_positions = start_positions[:, None, None].expand(-1, -1, hsz) # shape (bsz, 1, hsz)
start_states = hidden_states.gather(-2, start_positions) # shape (bsz, 1, hsz)
start_states = start_states.expand(-1, slen, -1) # shape (bsz, slen, hsz)
x = self.dense_0(torch.cat([hidden_states, start_states], dim=-1))
x = self.activation(x)
x = self.LayerNorm(x)
x = self.dense_1(x).squeeze(-1)
if p_mask is not None:
if get_parameter_dtype(self) == torch.float16:
x = x * (1 - p_mask) - 65500 * p_mask
else:
x = x * (1 - p_mask) - 1e30 * p_mask
return x
class PoolerAnswerClass(nn.Module):
"""
Compute SQuAD 2.0 answer class from classification and start tokens hidden states.
Args:
config ([`PretrainedConfig`]):
The config used by the model, will be used to grab the `hidden_size` of the model.
"""
def __init__(self, config):
super().__init__()
self.dense_0 = nn.Linear(config.hidden_size * 2, config.hidden_size)
self.activation = nn.Tanh()
self.dense_1 = nn.Linear(config.hidden_size, 1, bias=False)
logger.warning_once(
"[DEPRECATION WARNING] `PoolerAnswerClass` is deprecated and will be removed in v4.53. "
"Please use model-specific class, e.g. `XLMPoolerAnswerClass`."
)
def forward(
self,
hidden_states: torch.FloatTensor,
start_states: Optional[torch.FloatTensor] = None,
start_positions: Optional[torch.LongTensor] = None,
cls_index: Optional[torch.LongTensor] = None,
) -> torch.FloatTensor:
"""
Args:
hidden_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`):
The final hidden states of the model.
start_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`, *optional*):
The hidden states of the first tokens for the labeled span.
start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
The position of the first token for the labeled span.
cls_index (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Position of the CLS token for each sentence in the batch. If `None`, takes the last token.
<Tip>
One of `start_states` or `start_positions` should be not `None`. If both are set, `start_positions` overrides
`start_states`.
</Tip>
Returns:
`torch.FloatTensor`: The SQuAD 2.0 answer class.
"""
# No dependency on end_feature so that we can obtain one single `cls_logits` for each sample.
hsz = hidden_states.shape[-1]
assert start_states is not None or start_positions is not None, (
"One of start_states, start_positions should be not None"
)
if start_positions is not None:
start_positions = start_positions[:, None, None].expand(-1, -1, hsz) # shape (bsz, 1, hsz)
start_states = hidden_states.gather(-2, start_positions).squeeze(-2) # shape (bsz, hsz)
if cls_index is not None:
cls_index = cls_index[:, None, None].expand(-1, -1, hsz) # shape (bsz, 1, hsz)
cls_token_state = hidden_states.gather(-2, cls_index).squeeze(-2) # shape (bsz, hsz)
else:
cls_token_state = hidden_states[:, -1, :] # shape (bsz, hsz)
x = self.dense_0(torch.cat([start_states, cls_token_state], dim=-1))
x = self.activation(x)
x = self.dense_1(x).squeeze(-1)
return x
@dataclass
class SquadHeadOutput(ModelOutput):
"""
Base class for outputs of question answering models using a [`~modeling_utils.SQuADHead`].
Args:
loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned if both `start_positions` and `end_positions` are provided):
Classification loss as the sum of start token, end token (and is_impossible if provided) classification
losses.
start_top_log_probs (`torch.FloatTensor` of shape `(batch_size, config.start_n_top)`, *optional*, returned if `start_positions` or `end_positions` is not provided):
Log probabilities for the top config.start_n_top start token possibilities (beam-search).
start_top_index (`torch.LongTensor` of shape `(batch_size, config.start_n_top)`, *optional*, returned if `start_positions` or `end_positions` is not provided):
Indices for the top config.start_n_top start token possibilities (beam-search).
end_top_log_probs (`torch.FloatTensor` of shape `(batch_size, config.start_n_top * config.end_n_top)`, *optional*, returned if `start_positions` or `end_positions` is not provided):
Log probabilities for the top `config.start_n_top * config.end_n_top` end token possibilities
(beam-search).
end_top_index (`torch.LongTensor` of shape `(batch_size, config.start_n_top * config.end_n_top)`, *optional*, returned if `start_positions` or `end_positions` is not provided):
Indices for the top `config.start_n_top * config.end_n_top` end token possibilities (beam-search).
cls_logits (`torch.FloatTensor` of shape `(batch_size,)`, *optional*, returned if `start_positions` or `end_positions` is not provided):
Log probabilities for the `is_impossible` label of the answers.
"""
loss: Optional[torch.FloatTensor] = None
start_top_log_probs: Optional[torch.FloatTensor] = None
start_top_index: Optional[torch.LongTensor] = None
end_top_log_probs: Optional[torch.FloatTensor] = None
end_top_index: Optional[torch.LongTensor] = None
cls_logits: Optional[torch.FloatTensor] = None
def __post_init__(self):
logger.warning_once(
"[DEPRECATION WARNING] `SquadHeadOutput` is deprecated and will be removed in v4.53. "
"Please use model-specific class, e.g. `XLMSquadHeadOutput`."
)
class SQuADHead(nn.Module):
r"""
A SQuAD head inspired by XLNet.
Args:
config ([`PretrainedConfig`]):
The config used by the model, will be used to grab the `hidden_size` of the model and the `layer_norm_eps`
to use.
"""
def __init__(self, config):
super().__init__()
self.start_n_top = config.start_n_top
self.end_n_top = config.end_n_top
self.start_logits = PoolerStartLogits(config)
self.end_logits = PoolerEndLogits(config)
self.answer_class = PoolerAnswerClass(config)
logger.warning_once(
"[DEPRECATION WARNING] `SQuADHead` is deprecated and will be removed in v4.53. "
"Please use model-specific class, e.g. `XLMSQuADHead`."
)
@replace_return_docstrings(output_type=SquadHeadOutput, config_class=PretrainedConfig)
def forward(
self,
hidden_states: torch.FloatTensor,
start_positions: Optional[torch.LongTensor] = None,
end_positions: Optional[torch.LongTensor] = None,
cls_index: Optional[torch.LongTensor] = None,
is_impossible: Optional[torch.LongTensor] = None,
p_mask: Optional[torch.FloatTensor] = None,
return_dict: bool = False,
) -> Union[SquadHeadOutput, tuple[torch.FloatTensor]]:
"""
Args:
hidden_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`):
Final hidden states of the model on the sequence tokens.
start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Positions of the first token for the labeled span.
end_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Positions of the last token for the labeled span.
cls_index (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Position of the CLS token for each sentence in the batch. If `None`, takes the last token.
is_impossible (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Whether the question has a possible answer in the paragraph or not.
p_mask (`torch.FloatTensor` of shape `(batch_size, seq_len)`, *optional*):
Mask for tokens at invalid position, such as query and special symbols (PAD, SEP, CLS). 1.0 means token
should be masked.
return_dict (`bool`, *optional*, defaults to `False`):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
Returns:
"""
start_logits = self.start_logits(hidden_states, p_mask=p_mask)
if start_positions is not None and end_positions is not None:
# If we are on multi-GPU, let's remove the dimension added by batch splitting
for x in (start_positions, end_positions, cls_index, is_impossible):
if x is not None and x.dim() > 1:
x.squeeze_(-1)
# during training, compute the end logits based on the ground truth of the start position
end_logits = self.end_logits(hidden_states, start_positions=start_positions, p_mask=p_mask)
loss_fct = CrossEntropyLoss()
start_loss = loss_fct(start_logits, start_positions)
end_loss = loss_fct(end_logits, end_positions)
total_loss = (start_loss + end_loss) / 2
if cls_index is not None and is_impossible is not None:
# Predict answerability from the representation of CLS and START
cls_logits = self.answer_class(hidden_states, start_positions=start_positions, cls_index=cls_index)
loss_fct_cls = nn.BCEWithLogitsLoss()
cls_loss = loss_fct_cls(cls_logits, is_impossible)
# note(zhiliny): by default multiply the loss by 0.5 so that the scale is comparable to start_loss and end_loss
total_loss += cls_loss * 0.5
return SquadHeadOutput(loss=total_loss) if return_dict else (total_loss,)
else:
# during inference, compute the end logits based on beam search
bsz, slen, hsz = hidden_states.size()
start_log_probs = nn.functional.softmax(start_logits, dim=-1) # shape (bsz, slen)
start_top_log_probs, start_top_index = torch.topk(
start_log_probs, self.start_n_top, dim=-1
) # shape (bsz, start_n_top)
start_top_index_exp = start_top_index.unsqueeze(-1).expand(-1, -1, hsz) # shape (bsz, start_n_top, hsz)
start_states = torch.gather(hidden_states, -2, start_top_index_exp) # shape (bsz, start_n_top, hsz)
start_states = start_states.unsqueeze(1).expand(-1, slen, -1, -1) # shape (bsz, slen, start_n_top, hsz)
hidden_states_expanded = hidden_states.unsqueeze(2).expand_as(
start_states
) # shape (bsz, slen, start_n_top, hsz)
p_mask = p_mask.unsqueeze(-1) if p_mask is not None else None
end_logits = self.end_logits(hidden_states_expanded, start_states=start_states, p_mask=p_mask)
end_log_probs = nn.functional.softmax(end_logits, dim=1) # shape (bsz, slen, start_n_top)
end_top_log_probs, end_top_index = torch.topk(
end_log_probs, self.end_n_top, dim=1
) # shape (bsz, end_n_top, start_n_top)
end_top_log_probs = end_top_log_probs.view(-1, self.start_n_top * self.end_n_top)
end_top_index = end_top_index.view(-1, self.start_n_top * self.end_n_top)
start_states = torch.einsum("blh,bl->bh", hidden_states, start_log_probs)
cls_logits = self.answer_class(hidden_states, start_states=start_states, cls_index=cls_index)
if not return_dict:
return (start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits)
else:
return SquadHeadOutput(
start_top_log_probs=start_top_log_probs,
start_top_index=start_top_index,
end_top_log_probs=end_top_log_probs,
end_top_index=end_top_index,
cls_logits=cls_logits,
)
class SequenceSummary(nn.Module):
r"""
Compute a single vector summary of a sequence hidden states.
Args:
config ([`PretrainedConfig`]):
The config used by the model. Relevant arguments in the config class of the model are (refer to the actual
config class of your model for the default values it uses):
- **summary_type** (`str`) -- The method to use to make this summary. Accepted values are:
- `"last"` -- Take the last token hidden state (like XLNet)
- `"first"` -- Take the first token hidden state (like Bert)
- `"mean"` -- Take the mean of all tokens hidden states
- `"cls_index"` -- Supply a Tensor of classification token position (GPT/GPT-2)
- `"attn"` -- Not implemented now, use multi-head attention
- **summary_use_proj** (`bool`) -- Add a projection after the vector extraction.
- **summary_proj_to_labels** (`bool`) -- If `True`, the projection outputs to `config.num_labels` classes
(otherwise to `config.hidden_size`).
- **summary_activation** (`Optional[str]`) -- Set to `"tanh"` to add a tanh activation to the output,
another string or `None` will add no activation.
- **summary_first_dropout** (`float`) -- Optional dropout probability before the projection and activation.
- **summary_last_dropout** (`float`)-- Optional dropout probability after the projection and activation.
"""
def __init__(self, config: PretrainedConfig):
super().__init__()
self.summary_type = getattr(config, "summary_type", "last")
if self.summary_type == "attn":
# We should use a standard multi-head attention module with absolute positional embedding for that.
# Cf. https://github.com/zihangdai/xlnet/blob/master/modeling.py#L253-L276
# We can probably just use the multi-head attention module of PyTorch >=1.1.0
raise NotImplementedError
self.summary = Identity()
if hasattr(config, "summary_use_proj") and config.summary_use_proj:
if hasattr(config, "summary_proj_to_labels") and config.summary_proj_to_labels and config.num_labels > 0:
num_classes = config.num_labels
else:
num_classes = config.hidden_size
self.summary = nn.Linear(config.hidden_size, num_classes)
activation_string = getattr(config, "summary_activation", None)
self.activation: Callable = get_activation(activation_string) if activation_string else Identity()
self.first_dropout = Identity()
if hasattr(config, "summary_first_dropout") and config.summary_first_dropout > 0:
self.first_dropout = nn.Dropout(config.summary_first_dropout)
self.last_dropout = Identity()
if hasattr(config, "summary_last_dropout") and config.summary_last_dropout > 0:
self.last_dropout = nn.Dropout(config.summary_last_dropout)
logger.warning_once(
"[DEPRECATION WARNING] `SequenceSummary` is deprecated and will be removed in v4.53. "
"Please use model-specific class, e.g. `XLMSequenceSummary`."
)
def forward(
self, hidden_states: torch.FloatTensor, cls_index: Optional[torch.LongTensor] = None
) -> torch.FloatTensor:
"""
Compute a single vector summary of a sequence hidden states.
Args:
hidden_states (`torch.FloatTensor` of shape `[batch_size, seq_len, hidden_size]`):
The hidden states of the last layer.
cls_index (`torch.LongTensor` of shape `[batch_size]` or `[batch_size, ...]` where ... are optional leading dimensions of `hidden_states`, *optional*):
Used if `summary_type == "cls_index"` and takes the last token of the sequence as classification token.
Returns:
`torch.FloatTensor`: The summary of the sequence hidden states.
"""
if self.summary_type == "last":
output = hidden_states[:, -1]
elif self.summary_type == "first":
output = hidden_states[:, 0]
elif self.summary_type == "mean":
output = hidden_states.mean(dim=1)
elif self.summary_type == "cls_index":
if cls_index is None:
cls_index = torch.full_like(
hidden_states[..., :1, :],
hidden_states.shape[-2] - 1,
dtype=torch.long,
)
else:
cls_index = cls_index.unsqueeze(-1).unsqueeze(-1)
cls_index = cls_index.expand((-1,) * (cls_index.dim() - 1) + (hidden_states.size(-1),))
# shape of cls_index: (bsz, XX, 1, hidden_size) where XX are optional leading dim of hidden_states
output = hidden_states.gather(-2, cls_index).squeeze(-2) # shape (bsz, XX, hidden_size)
elif self.summary_type == "attn":
raise NotImplementedError
output = self.first_dropout(output)
output = self.summary(output)
output = self.activation(output)
output = self.last_dropout(output)
return output
def unwrap_model(model: nn.Module, recursive: bool = False) -> nn.Module:
"""
Recursively unwraps a model from potential containers (as used in distributed training).

View File

@ -206,7 +206,7 @@ def convert_audio_spectrogram_transformer_checkpoint(model_name, pytorch_dump_fo
if "speech-commands" in model_name:
# TODO: Convert dataset to Parquet
dataset = load_dataset("google/speech_commands", "v0.02", split="validation", trust_remote_code=True)
dataset = load_dataset("google/speech_commands", "v0.02", split="validation")
waveform = dataset[0]["audio"]["array"]
else:
filepath = hf_hub_download(

View File

@ -245,6 +245,10 @@ TOKENIZER_MAPPING_NAMES = OrderedDict[str, tuple[Optional[str], Optional[str]]](
("gpt_neox_japanese", ("GPTNeoXJapaneseTokenizer", None)),
("gptj", ("GPT2Tokenizer", "GPT2TokenizerFast" if is_tokenizers_available() else None)),
("gptsan-japanese", ("GPTSanJapaneseTokenizer", None)),
("granite", ("GPT2Tokenizer", None)),
("granitemoe", ("GPT2Tokenizer", None)),
("granitemoehybrid", ("GPT2Tokenizer", None)),
("granitemoeshared", ("GPT2Tokenizer", None)),
("grounding-dino", ("BertTokenizer", "BertTokenizerFast" if is_tokenizers_available() else None)),
("groupvit", ("CLIPTokenizer", "CLIPTokenizerFast" if is_tokenizers_available() else None)),
("helium", (None, "PreTrainedTokenizerFast" if is_tokenizers_available() else None)),

View File

@ -266,7 +266,7 @@ def convert_beit_checkpoint(checkpoint_url, pytorch_dump_folder_path):
# Check outputs on an image
if is_semantic:
image_processor = BeitImageProcessor(size=config.image_size, do_center_crop=False)
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
image = Image.open(ds[0]["file"])
else:
image_processor = BeitImageProcessor(

View File

@ -15,7 +15,14 @@
"""English Normalizer class for CLVP."""
import re
import sys
if sys.version_info >= (3, 11):
# Atomic grouping support was only added to the core RE in Python 3.11
import re
else:
import regex as re
class EnglishNormalizer:
@ -199,12 +206,12 @@ class EnglishNormalizer:
This method is used to normalize numbers within a text such as converting the numbers to words, removing
commas, etc.
"""
text = re.sub(re.compile(r"([0-9][0-9\,]+[0-9])"), self._remove_commas, text)
text = re.sub(re.compile(r"£([0-9\,]*[0-9]+)"), r"\1 pounds", text)
text = re.sub(re.compile(r"\$([0-9\.\,]*[0-9]+)"), self._expand_dollars, text)
text = re.sub(re.compile(r"([0-9]+\.[0-9]+)"), self._expand_decimal_point, text)
text = re.sub(re.compile(r"[0-9]+(st|nd|rd|th)"), self._expand_ordinal, text)
text = re.sub(re.compile(r"[0-9]+"), self._expand_number, text)
text = re.sub(r"([0-9][0-9,]+[0-9])", self._remove_commas, text)
text = re.sub(r"£([0-9,]*[0-9])", r"\1 pounds", text)
text = re.sub(r"\$([0-9.,]*[0-9])", self._expand_dollars, text)
text = re.sub(r"([0-9]++\.[0-9]+)", self._expand_decimal_point, text)
text = re.sub(r"[0-9]++(st|nd|rd|th)", self._expand_ordinal, text)
text = re.sub(r"[0-9]+", self._expand_number, text)
return text
def expand_abbreviations(self, text: str) -> str:

View File

@ -226,7 +226,7 @@ def convert_wav2vec2_checkpoint(
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-lv60")
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
input_audio = [x["array"] for x in ds[:4]["audio"]]
inputs = processor(input_audio, return_tensors="pt", padding=True)

View File

@ -1223,7 +1223,7 @@ class LayoutLMForQuestionAnswering(LayoutLMPreTrainedModel):
>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
>>> model = LayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")
>>> dataset = load_dataset("nielsr/funsd", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd", split="train")
>>> example = dataset[0]
>>> question = "what's his name?"
>>> words = example["words"]

View File

@ -1601,7 +1601,7 @@ class TFLayoutLMForQuestionAnswering(TFLayoutLMPreTrainedModel, TFQuestionAnswer
>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
>>> model = TFLayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")
>>> dataset = load_dataset("nielsr/funsd", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd", split="train")
>>> example = dataset[0]
>>> question = "what's his name?"
>>> words = example["words"]

View File

@ -763,9 +763,8 @@ class LayoutLMv2Model(LayoutLMv2PreTrainedModel):
>>> model = LayoutLMv2Model.from_pretrained("microsoft/layoutlmv2-base-uncased")
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa", trust_remote_code=True)
>>> image_path = dataset["test"][0]["file"]
>>> image = Image.open(image_path).convert("RGB")
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa")
>>> image = dataset["test"][0]["image"]
>>> encoding = processor(image, return_tensors="pt")
@ -953,7 +952,7 @@ class LayoutLMv2ForSequenceClassification(LayoutLMv2PreTrainedModel):
>>> set_seed(0)
>>> dataset = load_dataset("aharley/rvl_cdip", split="train", streaming=True, trust_remote_code=True)
>>> dataset = load_dataset("aharley/rvl_cdip", split="train", streaming=True)
>>> data = next(iter(dataset))
>>> image = data["image"].convert("RGB")
@ -1155,7 +1154,7 @@ class LayoutLMv2ForTokenClassification(LayoutLMv2PreTrainedModel):
>>> set_seed(0)
>>> datasets = load_dataset("nielsr/funsd", split="test", trust_remote_code=True)
>>> datasets = load_dataset("nielsr/funsd", split="test")
>>> labels = datasets.features["ner_tags"].feature.names
>>> id2label = {v: k for v, k in enumerate(labels)}
@ -1312,9 +1311,8 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel):
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
>>> model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased")
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa", trust_remote_code=True)
>>> image_path = dataset["test"][0]["file"]
>>> image = Image.open(image_path).convert("RGB")
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa")
>>> image = dataset["test"][0]["image"]
>>> question = "When is coffee break?"
>>> encoding = processor(image, question, return_tensors="pt")

View File

@ -746,7 +746,7 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
>>> model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0]
>>> image = example["image"]
>>> words = example["tokens"]
@ -961,7 +961,7 @@ class LayoutLMv3ForTokenClassification(LayoutLMv3PreTrainedModel):
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
>>> model = AutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0]
>>> image = example["image"]
>>> words = example["tokens"]
@ -1062,7 +1062,7 @@ class LayoutLMv3ForQuestionAnswering(LayoutLMv3PreTrainedModel):
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
>>> model = AutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0]
>>> image = example["image"]
>>> question = "what's his name?"
@ -1182,7 +1182,7 @@ class LayoutLMv3ForSequenceClassification(LayoutLMv3PreTrainedModel):
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
>>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0]
>>> image = example["image"]
>>> words = example["tokens"]

View File

@ -1296,7 +1296,7 @@ class TFLayoutLMv3Model(TFLayoutLMv3PreTrainedModel):
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
>>> model = TFAutoModel.from_pretrained("microsoft/layoutlmv3-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0]
>>> image = example["image"]
>>> words = example["tokens"]
@ -1439,7 +1439,7 @@ class TFLayoutLMv3ForSequenceClassification(TFLayoutLMv3PreTrainedModel, TFSeque
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
>>> model = TFAutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0]
>>> image = example["image"]
>>> words = example["tokens"]
@ -1566,7 +1566,7 @@ class TFLayoutLMv3ForTokenClassification(TFLayoutLMv3PreTrainedModel, TFTokenCla
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
>>> model = TFAutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0]
>>> image = example["image"]
>>> words = example["tokens"]
@ -1703,7 +1703,7 @@ class TFLayoutLMv3ForQuestionAnswering(TFLayoutLMv3PreTrainedModel, TFQuestionAn
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
>>> model = TFAutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0]
>>> image = example["image"]
>>> question = "what's his name?"

View File

@ -653,7 +653,7 @@ class LiltModel(LiltPreTrainedModel):
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> model = AutoModel.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0]
>>> words = example["tokens"]
>>> boxes = example["bboxes"]
@ -793,7 +793,7 @@ class LiltForSequenceClassification(LiltPreTrainedModel):
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> model = AutoModelForSequenceClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0]
>>> words = example["tokens"]
>>> boxes = example["bboxes"]
@ -908,7 +908,7 @@ class LiltForTokenClassification(LiltPreTrainedModel):
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> model = AutoModelForTokenClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0]
>>> words = example["tokens"]
>>> boxes = example["bboxes"]
@ -1025,7 +1025,7 @@ class LiltForQuestionAnswering(LiltPreTrainedModel):
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> model = AutoModelForQuestionAnswering.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0]
>>> words = example["tokens"]
>>> boxes = example["bboxes"]

View File

@ -2228,7 +2228,7 @@ class SpeechT5ForSpeechToText(SpeechT5PreTrainedModel, GenerationMixin):
>>> from datasets import load_dataset
>>> dataset = load_dataset(
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation"
... ) # doctest: +IGNORE_RESULT
>>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate
@ -2909,7 +2909,7 @@ class SpeechT5ForSpeechToSpeech(SpeechT5PreTrainedModel):
>>> import torch
>>> dataset = load_dataset(
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation"
... ) # doctest: +IGNORE_RESULT
>>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate

View File

@ -1604,7 +1604,7 @@ class UdopModel(UdopPreTrainedModel):
>>> # load an example image, along with the words and coordinates
>>> # which were extracted using an OCR engine
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0]
>>> image = example["image"]
>>> words = example["tokens"]
@ -1813,7 +1813,7 @@ class UdopForConditionalGeneration(UdopPreTrainedModel, GenerationMixin):
>>> # load an example image, along with the words and coordinates
>>> # which were extracted using an OCR engine
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0]
>>> image = example["image"]
>>> words = example["tokens"]
@ -2025,7 +2025,7 @@ class UdopEncoderModel(UdopPreTrainedModel):
>>> # load an example image, along with the words and coordinates
>>> # which were extracted using an OCR engine
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
>>> example = dataset[0]
>>> image = example["image"]
>>> words = example["tokens"]

View File

@ -590,7 +590,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer):
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
>>> # load first sample of English common_voice
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True)
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
>>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
>>> dataset_iter = iter(dataset)
>>> sample = next(dataset_iter)

View File

@ -546,7 +546,7 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin):
>>> processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")
>>> # load first sample of English common_voice
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True)
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
>>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
>>> dataset_iter = iter(dataset)
>>> sample = next(dataset_iter)

View File

@ -1670,7 +1670,7 @@ FLAX_WHISPER_AUDIO_CLASSIFICATION_DOCSTRING = r"""
>>> model = FlaxWhisperForAudioClassification.from_pretrained(
... "sanchit-gandhi/whisper-medium-fleurs-lang-id", from_pt=True
... )
>>> ds = load_dataset("google/fleurs", "all", split="validation", streaming=True, trust_remote_code=True)
>>> ds = load_dataset("google/fleurs", "all", split="validation", streaming=True)
>>> sample = next(iter(ds))

View File

@ -526,12 +526,15 @@ class Trainer:
if is_liger_kernel_available():
from liger_kernel.transformers import _apply_liger_kernel_to_instance
# Prepare kernel config - use provided config or default (empty dict for default behavior)
kernel_config = self.args.liger_kernel_config if self.args.liger_kernel_config is not None else {}
if isinstance(model, PreTrainedModel):
# Patch the model with liger kernels. Use the default kernel configurations.
_apply_liger_kernel_to_instance(model=model)
# Patch the model with liger kernels. Use the the specified or default kernel configurations.
_apply_liger_kernel_to_instance(model=model, **kernel_config)
elif hasattr(model, "get_base_model") and isinstance(model.get_base_model(), PreTrainedModel):
# Patch the base model with liger kernels where model is a PeftModel. Use the default kernel configurations.
_apply_liger_kernel_to_instance(model=model.get_base_model())
# Patch the base model with liger kernels where model is a PeftModel. Use the specified or default kernel configurations.
_apply_liger_kernel_to_instance(model=model.get_base_model(), **kernel_config)
else:
logger.warning(
"The model is not an instance of PreTrainedModel. No liger kernels will be applied."

View File

@ -793,6 +793,11 @@ class TrainingArguments:
It can effectively increase multi-GPU training throughput by ~20% and reduces memory usage by ~60%, works out of the box with
flash attention, PyTorch FSDP, and Microsoft DeepSpeed. Currently, it supports llama, mistral, mixtral and gemma models.
liger_kernel_config (`Optional[dict]`, *optional*):
Configuration to be used for Liger Kernel. When use_liger_kernel=True, this dict is passed as keyword arguments to the
`_apply_liger_kernel_to_instance` function, which specifies which kernels to apply. Available options vary by model but typically
include: 'rope', 'swiglu', 'cross_entropy', 'fused_linear_cross_entropy', 'rms_norm', etc. If `None`, use the default kernel configurations.
average_tokens_across_devices (`bool`, *optional*, defaults to `False`):
Whether or not to average tokens across devices. If enabled, will use all_reduce to synchronize
num_tokens_in_batch for precise loss calculation. Reference:
@ -1525,6 +1530,19 @@ class TrainingArguments:
metadata={"help": "Whether or not to enable the Liger Kernel for model training."},
)
liger_kernel_config: Optional[dict[str, bool]] = field(
default=None,
metadata={
"help": (
"Configuration to be used for Liger Kernel. When use_liger_kernel=True, "
"this dict is passed as keyword arguments to the `_apply_liger_kernel_to_instance` function, "
"which specifies which kernels to apply. Available options vary by model "
"but typically include: 'rope', 'swiglu', 'cross_entropy', 'fused_linear_cross_entropy', "
"'rms_norm', etc. If None, use the default kernel configurations."
)
},
)
eval_use_gather_object: Optional[bool] = field(
default=False,
metadata={

View File

@ -423,7 +423,7 @@ PT_SPEECH_BASE_MODEL_SAMPLE = r"""
>>> import torch
>>> from datasets import load_dataset
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate
@ -449,7 +449,7 @@ PT_SPEECH_CTC_SAMPLE = r"""
>>> from datasets import load_dataset
>>> import torch
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate
@ -484,7 +484,7 @@ PT_SPEECH_SEQ_CLASS_SAMPLE = r"""
>>> from datasets import load_dataset
>>> import torch
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate
@ -520,7 +520,7 @@ PT_SPEECH_FRAME_CLASS_SAMPLE = r"""
>>> from datasets import load_dataset
>>> import torch
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate
@ -549,7 +549,7 @@ PT_SPEECH_XVECTOR_SAMPLE = r"""
>>> from datasets import load_dataset
>>> import torch
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate
@ -584,7 +584,7 @@ PT_VISION_BASE_MODEL_SAMPLE = r"""
>>> import torch
>>> from datasets import load_dataset
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
>>> dataset = load_dataset("huggingface/cats-image")
>>> image = dataset["test"]["image"][0]
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
@ -609,7 +609,7 @@ PT_VISION_SEQ_CLASS_SAMPLE = r"""
>>> import torch
>>> from datasets import load_dataset
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
>>> dataset = load_dataset("huggingface/cats-image")
>>> image = dataset["test"]["image"][0]
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
@ -1194,7 +1194,7 @@ TF_SPEECH_BASE_MODEL_SAMPLE = r"""
>>> from transformers import AutoProcessor, {model_class}
>>> from datasets import load_dataset
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate
@ -1219,7 +1219,7 @@ TF_SPEECH_CTC_SAMPLE = r"""
>>> from datasets import load_dataset
>>> import tensorflow as tf
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate
@ -1254,7 +1254,7 @@ TF_VISION_BASE_MODEL_SAMPLE = r"""
>>> from transformers import AutoImageProcessor, {model_class}
>>> from datasets import load_dataset
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
>>> dataset = load_dataset("huggingface/cats-image")
>>> image = dataset["test"]["image"][0]
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
@ -1277,7 +1277,7 @@ TF_VISION_SEQ_CLASS_SAMPLE = r"""
>>> import tensorflow as tf
>>> from datasets import load_dataset
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
>>> dataset = load_dataset("huggingface/cats-image"))
>>> image = dataset["test"]["image"][0]
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")

View File

@ -269,7 +269,6 @@ def make_task_cmds():
"img_clas": f"""
{scripts_dir}/image-classification/run_image_classification.py
--dataset_name hf-internal-testing/cats_vs_dogs_sample
--trust_remote_code
--remove_unused_columns False
--max_steps 10
--image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json

View File

@ -27,8 +27,6 @@ if is_torch_available():
import torch
if is_vision_available():
from PIL import Image
from transformers import BeitImageProcessor
if is_torchvision_available():
@ -98,23 +96,14 @@ class BeitImageProcessingTester:
def prepare_semantic_single_inputs():
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
image = Image.open(dataset[0]["file"])
map = Image.open(dataset[1]["file"])
return image, map
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
example = ds[0]
return example["image"], example["map"]
def prepare_semantic_batch_inputs():
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
image1 = Image.open(ds[0]["file"])
map1 = Image.open(ds[1]["file"])
image2 = Image.open(ds[2]["file"])
map2 = Image.open(ds[3]["file"])
return [image1, image2], [map1, map2]
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
return list(ds["image"][:2]), list(ds["map"][:2])
@require_torch

View File

@ -504,7 +504,7 @@ class BeitModelIntegrationTest(unittest.TestCase):
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
image = Image.open(ds[0]["file"])
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
@ -547,7 +547,7 @@ class BeitModelIntegrationTest(unittest.TestCase):
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
image = Image.open(ds[0]["file"])
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)

View File

@ -669,7 +669,7 @@ class Data2VecAudioModelIntegrationTest(unittest.TestCase):
return [x["array"] for x in speech_samples]
def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
ds = load_dataset("anton-l/superb_dummy", task, split="test")
return ds[:num_samples]

View File

@ -29,8 +29,6 @@ if is_torch_available():
import torch
if is_vision_available():
from PIL import Image
from transformers import DPTImageProcessor
if is_torchvision_available():
@ -94,24 +92,15 @@ class DPTImageProcessingTester:
# Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_single_inputs
def prepare_semantic_single_inputs():
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
image = Image.open(dataset[0]["file"])
map = Image.open(dataset[1]["file"])
return image, map
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
example = ds[0]
return example["image"], example["map"]
# Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_batch_inputs
def prepare_semantic_batch_inputs():
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
image1 = Image.open(ds[0]["file"])
map1 = Image.open(ds[1]["file"])
image2 = Image.open(ds[2]["file"])
map2 = Image.open(ds[3]["file"])
return [image1, image2], [map1, map2]
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
return list(ds["image"][:2]), list(ds["map"][:2])
@require_torch

View File

@ -767,7 +767,7 @@ class HubertModelIntegrationTest(unittest.TestCase):
def _load_superb(self, task, num_samples):
from datasets import load_dataset
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
ds = load_dataset("anton-l/superb_dummy", task, split="test")
return ds[:num_samples]

View File

@ -111,13 +111,13 @@ class LayoutLMv2ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
def test_layoutlmv2_integration_test(self):
from datasets import load_dataset
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
for image_processing_class in self.image_processor_list:
# with apply_OCR = True
image_processing = image_processing_class()
image = Image.open(ds[0]["file"]).convert("RGB")
image = ds[0]["image"]
encoding = image_processing(image, return_tensors="pt")

View File

@ -156,7 +156,7 @@ class LayoutLMv2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
from datasets import load_dataset
# set up
datasets = load_dataset("nielsr/funsd", trust_remote_code=True)
datasets = load_dataset("nielsr/funsd")
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
def preprocess_data(examples):
@ -192,12 +192,8 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
# we verify our implementation on 2 document images from the DocVQA dataset
from datasets import load_dataset
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
image_1 = Image.open(ds[0]["file"]).convert("RGB")
image_2 = Image.open(ds[1]["file"]).convert("RGB")
return image_1, image_2
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
return ds[0]["image"], ds[1]["image"]
@cached_property
def get_tokenizers(self):

View File

@ -22,8 +22,6 @@ from ...test_image_processing_common import ImageProcessingTestMixin, prepare_im
if is_pytesseract_available():
from PIL import Image
from transformers import LayoutLMv3ImageProcessor
if is_torchvision_available():
@ -106,13 +104,13 @@ class LayoutLMv3ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
def test_LayoutLMv3_integration_test(self):
from datasets import load_dataset
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
# with apply_OCR = True
for image_processing_class in self.image_processor_list:
image_processor = image_processing_class()
image = Image.open(ds[0]["file"]).convert("RGB")
image = ds[0]["image"].convert("RGB")
encoding = image_processor(image, return_tensors="pt")

View File

@ -28,8 +28,6 @@ from ...test_processing_common import ProcessorTesterMixin
if is_pytesseract_available():
from PIL import Image
from transformers import LayoutLMv3ImageProcessor
@ -172,12 +170,8 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
# we verify our implementation on 2 document images from the DocVQA dataset
from datasets import load_dataset
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
image_1 = Image.open(ds[0]["file"]).convert("RGB")
image_2 = Image.open(ds[1]["file"]).convert("RGB")
return image_1, image_2
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
return ds[0]["image"], ds[1]["image"]
@cached_property
def get_tokenizers(self):

View File

@ -162,7 +162,7 @@ class LayoutXLMProcessorTest(ProcessorTesterMixin, unittest.TestCase):
from datasets import load_dataset
# set up
datasets = load_dataset("nielsr/funsd", trust_remote_code=True)
datasets = load_dataset("nielsr/funsd")
processor = LayoutXLMProcessor.from_pretrained("microsoft/layoutxlm-base", apply_ocr=False)
def preprocess_data(examples):
@ -200,12 +200,8 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
# we verify our implementation on 2 document images from the DocVQA dataset
from datasets import load_dataset
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
image_1 = Image.open(ds[0]["file"]).convert("RGB")
image_2 = Image.open(ds[1]["file"]).convert("RGB")
return image_1, image_2
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
return ds[0]["image"], ds[1]["image"]
@cached_property
def get_tokenizers(self):

View File

@ -27,8 +27,6 @@ if is_torch_available():
import torch
if is_vision_available():
from PIL import Image
from transformers import MobileViTImageProcessor
@ -86,23 +84,14 @@ class MobileViTImageProcessingTester:
def prepare_semantic_single_inputs():
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
image = Image.open(dataset[0]["file"])
map = Image.open(dataset[1]["file"])
return image, map
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
example = ds[0]
return example["image"], example["map"]
def prepare_semantic_batch_inputs():
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
image1 = Image.open(dataset[0]["file"])
map1 = Image.open(dataset[1]["file"])
image2 = Image.open(dataset[2]["file"])
map2 = Image.open(dataset[3]["file"])
return [image1, image2], [map1, map2]
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
return list(ds["image"][:2]), list(ds["map"][:2])
@require_torch

View File

@ -86,8 +86,12 @@ class NougatImageProcessingTester:
return self.num_channels, self.size["height"], self.size["width"]
def prepare_dummy_image(self):
revision = "ec57bf8c8b1653a209c13f6e9ee66b12df0fc2db"
filepath = hf_hub_download(
repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_pdf.png", repo_type="dataset"
repo_id="hf-internal-testing/fixtures_docvqa",
filename="nougat_pdf.png",
repo_type="dataset",
revision=revision,
)
image = Image.open(filepath).convert("RGB")
return image
@ -179,8 +183,12 @@ class NougatImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
self.assertEqual((3, 100, 200), aligned_image.shape)
def prepare_dummy_np_image(self):
revision = "ec57bf8c8b1653a209c13f6e9ee66b12df0fc2db"
filepath = hf_hub_download(
repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_pdf.png", repo_type="dataset"
repo_id="hf-internal-testing/fixtures_docvqa",
filename="nougat_pdf.png",
repo_type="dataset",
revision=revision,
)
image = Image.open(filepath).convert("RGB")
return np.array(image)

View File

@ -842,11 +842,8 @@ def prepare_img():
# Helper functions for optical flow integration test
def prepare_optical_flow_images():
dataset = load_dataset("hf-internal-testing/fixtures_sintel", split="test", trust_remote_code=True)
image1 = Image.open(dataset[0]["file"]).convert("RGB")
image2 = Image.open(dataset[0]["file"]).convert("RGB")
return image1, image2
ds = load_dataset("hf-internal-testing/fixtures_sintel", split="test")
return list(ds["image"][:2])
def normalize(img):

View File

@ -27,8 +27,6 @@ if is_torch_available():
import torch
if is_vision_available():
from PIL import Image
from transformers import SegformerImageProcessor
@ -86,23 +84,14 @@ class SegformerImageProcessingTester:
def prepare_semantic_single_inputs():
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
image = Image.open(dataset[0]["file"])
map = Image.open(dataset[1]["file"])
return image, map
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
example = ds[0]
return example["image"], example["map"]
def prepare_semantic_batch_inputs():
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
image1 = Image.open(dataset[0]["file"])
map1 = Image.open(dataset[1]["file"])
image2 = Image.open(dataset[2]["file"])
map2 = Image.open(dataset[3]["file"])
return [image1, image2], [map1, map2]
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
return list(ds["image"][:2]), list(ds["map"][:2])
@require_torch

View File

@ -184,7 +184,7 @@ class UdopProcessorTest(ProcessorTesterMixin, unittest.TestCase):
from datasets import load_dataset
# set up
datasets = load_dataset("nielsr/funsd", trust_remote_code=True)
datasets = load_dataset("nielsr/funsd")
processor = UdopProcessor.from_pretrained("microsoft/udop-large", apply_ocr=False)
def preprocess_data(examples):
@ -222,12 +222,8 @@ class UdopProcessorIntegrationTests(unittest.TestCase):
# we verify our implementation on 2 document images from the DocVQA dataset
from datasets import load_dataset
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
image_1 = Image.open(ds[0]["file"]).convert("RGB")
image_2 = Image.open(ds[1]["file"]).convert("RGB")
return image_1, image_2
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
return ds[0]["image"], ds[1]["image"]
@cached_property
def get_tokenizers(self):

View File

@ -566,7 +566,7 @@ class UniSpeechModelIntegrationTest(unittest.TestCase):
return [x["array"] for x in speech_samples]
def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
ds = load_dataset("anton-l/superb_dummy", task, split="test")
return ds[:num_samples]

View File

@ -820,7 +820,7 @@ class UniSpeechSatModelIntegrationTest(unittest.TestCase):
return [x["array"] for x in speech_samples]
def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
ds = load_dataset("anton-l/superb_dummy", task, split="test")
return ds[:num_samples]

View File

@ -637,9 +637,9 @@ class ViltModelIntegrationTest(unittest.TestCase):
processor = self.default_processor
dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="test", trust_remote_code=True)
image1 = Image.open(dataset[0]["file"]).convert("RGB")
image2 = Image.open(dataset[1]["file"]).convert("RGB")
dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="train")
image1 = dataset[0]["image"]
image2 = dataset[1]["image"]
text = (
"The left image contains twice the number of dogs as the right image, and at least two dogs in total are"

View File

@ -1149,8 +1149,8 @@ class TrOCRModelIntegrationTest(unittest.TestCase):
def test_inference_handwritten(self):
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten").to(torch_device)
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test", trust_remote_code=True)
image = Image.open(dataset[0]["file"]).convert("RGB")
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="train")
image = dataset[0]["image"]
processor = self.default_processor
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device)
@ -1174,8 +1174,8 @@ class TrOCRModelIntegrationTest(unittest.TestCase):
def test_inference_printed(self):
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed").to(torch_device)
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test", trust_remote_code=True)
image = Image.open(dataset[1]["file"]).convert("RGB")
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test")
image = dataset[0]["image"]
processor = self.default_processor
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device)

View File

@ -97,9 +97,7 @@ def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout):
try:
_ = in_queue.get(timeout=timeout)
ds = load_dataset(
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
)
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
sample = next(iter(ds))
resampled_audio = torchaudio.functional.resample(
@ -1470,7 +1468,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
return [x["array"] for x in speech_samples]
def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
ds = load_dataset("anton-l/superb_dummy", task, split="test")
return ds[:num_samples]
@ -1836,9 +1834,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
@require_pyctcdecode
@require_torchaudio
def test_wav2vec2_with_lm(self):
ds = load_dataset(
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
)
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
sample = next(iter(ds))
resampled_audio = torchaudio.functional.resample(
@ -1862,9 +1858,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
@require_pyctcdecode
@require_torchaudio
def test_wav2vec2_with_lm_pool(self):
ds = load_dataset(
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
)
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
sample = next(iter(ds))
resampled_audio = torchaudio.functional.resample(
@ -1963,9 +1957,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
LANG_MAP = {"it": "ita", "es": "spa", "fr": "fra", "en": "eng"}
def run_model(lang):
ds = load_dataset(
"mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True, trust_remote_code=True
)
ds = load_dataset("mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True)
sample = next(iter(ds))
wav2vec2_lang = LANG_MAP[lang]

View File

@ -463,9 +463,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
def test_word_time_stamp_integration(self):
import torch
ds = load_dataset(
"mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True
)
ds = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
ds_iter = iter(ds)
sample = next(ds_iter)

View File

@ -473,7 +473,7 @@ class WavLMModelIntegrationTest(unittest.TestCase):
return [x["array"] for x in speech_samples]
def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
ds = load_dataset("anton-l/superb_dummy", task, split="test")
return ds[:num_samples]

View File

@ -1645,9 +1645,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3")
model.to(torch_device)
ds = load_dataset(
"facebook/multilingual_librispeech", "german", split="test", streaming=True, trust_remote_code=True
)
ds = load_dataset("facebook/multilingual_librispeech", "german", split="test", streaming=True)
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
input_speech = next(iter(ds))["audio"]["array"]
@ -1714,11 +1712,10 @@ class WhisperModelIntegrationTests(unittest.TestCase):
token = os.getenv("HF_HUB_READ_TOKEN", True)
ds = load_dataset(
"mozilla-foundation/common_voice_6_1",
"hf-internal-testing/fixtures_common_voice",
"ja",
split="test",
streaming=True,
trust_remote_code=True,
token=token,
)
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))

View File

@ -179,7 +179,7 @@ class AudioClassificationPipelineTests(unittest.TestCase):
model = "superb/wav2vec2-base-superb-ks"
audio_classifier = pipeline("audio-classification", model=model)
dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test", trust_remote_code=True)
dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test")
audio = np.array(dataset[3]["speech"], dtype=np.float32)
output = audio_classifier(audio, top_k=4)

View File

@ -265,9 +265,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
@require_torch
@require_pyctcdecode
def test_large_model_pt_with_lm(self):
dataset = load_dataset("Narsil/asr_dummy", streaming=True, trust_remote_code=True)
third_item = next(iter(dataset["test"].skip(3)))
filename = third_item["file"]
filename = hf_hub_download("Narsil/asr_dummy", filename="4.flac", repo_type="dataset")
speech_recognizer = pipeline(
task="automatic-speech-recognition",
@ -388,7 +386,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
chunk_length_s=8,
stride_length_s=1,
)
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
sample = next(iter(data))
res = pipe(sample["audio"]["array"])
@ -434,7 +432,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
stride_length_s=1,
return_language=True,
)
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
sample = next(iter(data))
res = pipe(sample["audio"]["array"])
@ -489,7 +487,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
task="automatic-speech-recognition",
model="openai/whisper-tiny.en",
)
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
samples = [next(iter(data)) for _ in range(8)]
audio = np.concatenate([sample["audio"]["array"] for sample in samples])
@ -1125,9 +1123,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
@slow
def test_speculative_decoding_whisper_non_distil(self):
# Load data:
dataset = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True
)
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]")
sample = dataset[0]["audio"]
# Load model:
@ -1169,9 +1165,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
@slow
def test_speculative_decoding_whisper_distil(self):
# Load data:
dataset = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True
)
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]")
sample = dataset[0]["audio"]
# Load model:

View File

@ -601,7 +601,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
image_segmenter = pipeline("image-segmentation", model=model, image_processor=image_processor)
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
file = image[0]["file"]
outputs = image_segmenter(file, threshold=threshold)
@ -655,7 +655,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
def test_oneformer(self):
image_segmenter = pipeline(model="shi-labs/oneformer_ade20k_swin_tiny")
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
file = image[0]["file"]
outputs = image_segmenter(file, threshold=0.99)
# Shortening by hashing

View File

@ -3799,8 +3799,20 @@ class ModelTesterMixin:
self.skipTest(reason="Idefics currently (transformers==4.39.1) requires an image_attention_mask input")
if config.model_type in ["sam"]:
self.skipTest(reason="SAM requires an attention_mask input for relative positional embeddings")
model = model_class(config)
sub_models_supporting_sdpa = [
module._supports_sdpa
for name, module in model.named_modules()
if isinstance(module, PreTrainedModel) and name != ""
]
supports_sdpa_all_modules = (
all(sub_models_supporting_sdpa) if len(sub_models_supporting_sdpa) > 0 else model._supports_sdpa
)
if not supports_sdpa_all_modules:
self.skipTest(reason="This models' submodels does not support sdpa")
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname)
model = model_class.from_pretrained(tmpdirname, torch_dtype=torch.float16, attn_implementation="sdpa")
@ -3848,8 +3860,20 @@ class ModelTesterMixin:
"Cannot compile forward without an existing cache with Hybrid, as `torch._dynamo.mark_static_address` "
"is a forbidden call."
)
model = model_class(config)
sub_models_supporting_sdpa = [
module._supports_sdpa
for name, module in model.named_modules()
if isinstance(module, PreTrainedModel) and name != ""
]
supports_sdpa_all_modules = (
all(sub_models_supporting_sdpa) if len(sub_models_supporting_sdpa) > 0 else model._supports_sdpa
)
if not supports_sdpa_all_modules:
self.skipTest(reason="This models' submodels does not support sdpa")
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname)
model = model_class.from_pretrained(tmpdirname, torch_dtype=torch.float16, attn_implementation="sdpa")

View File

@ -1792,6 +1792,25 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
self.assertEqual(modeling_llama.apply_rotary_pos_emb, liger_rotary_pos_emb)
self.assertTrue(isinstance(tiny_llama.model.norm, LigerRMSNorm))
@require_liger_kernel
def test_use_liger_kernel_custom_config_patching(self):
# Ensure any monkey patching is cleaned up for subsequent tests
with patch("transformers.models.llama.modeling_llama"):
from liger_kernel.transformers import LigerRMSNorm
config = LlamaConfig(vocab_size=100, hidden_size=32, num_hidden_layers=3, num_attention_heads=4)
tiny_llama = LlamaForCausalLM(config)
args = TrainingArguments(
self.get_auto_remove_tmp_dir(),
use_liger_kernel=True,
liger_kernel_config={"rms_norm": False}, # Don't apply Liger's RMSNorm
)
Trainer(tiny_llama, args)
# Check that the RMSNorm kernel is not applied as specified in the config
self.assertFalse(isinstance(tiny_llama.model.norm, LigerRMSNorm))
@require_liger_kernel
@require_torch_accelerator
def test_use_liger_kernel_trainer(self):
@ -1810,6 +1829,29 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
# Check this works
_ = trainer.train()
@require_liger_kernel
@require_torch_accelerator
def test_use_liger_kernel_custom_config_trainer(self):
# Check that trainer still works with liger kernel applied when using a custom config
config = LlamaConfig(vocab_size=100, hidden_size=32, num_hidden_layers=3, num_attention_heads=4)
tiny_llama = LlamaForCausalLM(config)
x = torch.randint(0, 100, (128,))
train_dataset = RepeatDataset(x)
args = TrainingArguments(
self.get_auto_remove_tmp_dir(),
learning_rate=1e-2,
logging_steps=5,
max_steps=20,
use_liger_kernel=True,
liger_kernel_config={"rms_norm": False, "cross_entropy": True, "fused_linear_cross_entropy": False},
)
trainer = Trainer(tiny_llama, args, train_dataset=train_dataset)
# Check this works
_ = trainer.train()
@require_lomo
@require_torch_accelerator
def test_lomo(self):

View File

@ -133,10 +133,19 @@ if __name__ == "__main__":
# Assuming there is a topological sort on the dependency mapping: if the file being checked and its dependencies
# are not in the diff, then there it is guaranteed to have no differences. If no models are in the diff, then this
# script will do nothing.
models_in_diff = get_models_in_diff()
if not models_in_diff:
console.print("[bold green]No models files or model tests in the diff, skipping modular checks[/bold green]")
exit(0)
current_branch = subprocess.check_output(["git", "branch", "--show-current"], text=True).strip()
if current_branch == "main":
console.print(
"[bold red]You are developing on the main branch. We cannot identify the list of changed files and will have to check all files. This may take a while.[/bold red]"
)
models_in_diff = {file_path.split("/")[-2] for file_path in args.files}
else:
models_in_diff = get_models_in_diff()
if not models_in_diff:
console.print(
"[bold green]No models files or model tests in the diff, skipping modular checks[/bold green]"
)
exit(0)
skipped_models = set()
non_matching_files = 0
@ -149,7 +158,8 @@ if __name__ == "__main__":
skipped_models.add(model_name)
continue
non_matching_files += compare_files(modular_file_path, args.fix_and_overwrite)
models_in_diff = get_models_in_diff() # When overwriting, the diff changes
if current_branch != "main":
models_in_diff = get_models_in_diff() # When overwriting, the diff changes
else:
new_ordered_files = []
for modular_file_path in ordered_files:

View File

@ -1,59 +0,0 @@
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script is used to get a map containing the information of runners to use in GitHub Actions workflow files.
This is meant to be a temporary file that helps us to switch progressively from T4 to A10 runners.
The data is stored in a Hub repository [hf-internal-testing/transformers_daily_ci](https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/blob/main/runner_map.json).
Currently, in that file, we specify the models for which we want to run the tests with T4 runners to avoid many test failures showing on the CI reports.
We will work on the tests toward to use A10 for all CI jobs.
"""
import os
import requests
if __name__ == "__main__":
# T4
t4_runners = {
"single-gpu": "aws-g4dn-4xlarge-cache",
"multi-gpu": "aws-g4dn-12xlarge-cache",
}
# A10
a10_runners = {
"single-gpu": "aws-g5-4xlarge-cache",
"multi-gpu": "aws-g5-12xlarge-cache",
}
tests = os.getcwd()
model_tests = os.listdir(os.path.join(tests, "models"))
d1 = sorted(filter(os.path.isdir, os.listdir(tests)))
d2 = sorted(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))
d1.remove("models")
d = d2 + d1
response = requests.get("https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/resolve/main/runner_map.json")
# The models that we want to run with T4 runners
runner_map = response.json()
for key in d:
if key in runner_map:
runner_map[key] = t4_runners
else:
runner_map[key] = a10_runners
print(runner_map)

View File

@ -1494,7 +1494,7 @@ if __name__ == "__main__":
other_ci_artifacts=other_ci_artifacts,
)
# # send report only if there is any failure (for push CI)
# if message.n_failures or (ci_event != "push" and not ci_event.startswith("Push CI (AMD)")):
# message.post()
# message.post_reply()
# send report only if there is any failure (for push CI)
if message.n_failures or (ci_event != "push" and not ci_event.startswith("Push CI (AMD)")):
message.post()
message.post_reply()

View File

@ -62,5 +62,4 @@ if __name__ == "__main__":
start = end
end = start + num_jobs_per_splits + (1 if idx < num_jobs % args.num_splits else 0)
model_splits.append(d[start:end])
model_splits = [['models/vit', 'generation'], ['models/clip', 'models/vits']]
print(model_splits)