mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-22 02:08:58 +08:00
Compare commits
21 Commits
allow_ci_t
...
trigger-re
Author | SHA1 | Date | |
---|---|---|---|
6d38d27ef3 | |||
20c0f8bc77 | |||
9b2afaf02d | |||
d188134b95 | |||
e2ed15c465 | |||
005459827e | |||
69419a4935 | |||
1fdb9f3908 | |||
3dfebf2fc0 | |||
e6093deb18 | |||
b7ec09c2f4 | |||
aa42987c1e | |||
38a9b70786 | |||
9bcdd5cde9 | |||
31d30b7224 | |||
0725cd6953 | |||
797860c68c | |||
89b35be618 | |||
9a02e7602d | |||
54a02160eb | |||
af6120b3eb |
6
.github/workflows/model_jobs.yml
vendored
6
.github/workflows/model_jobs.yml
vendored
@ -12,8 +12,8 @@ on:
|
||||
slice_id:
|
||||
required: true
|
||||
type: number
|
||||
runner_map:
|
||||
required: false
|
||||
runner:
|
||||
required: true
|
||||
type: string
|
||||
docker:
|
||||
required: true
|
||||
@ -45,7 +45,7 @@ jobs:
|
||||
matrix:
|
||||
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
|
||||
runs-on:
|
||||
group: ${{ fromJson(inputs.runner_map)[matrix.folders][inputs.machine_type] }}
|
||||
group: '${{ inputs.machine_type }}'
|
||||
container:
|
||||
image: ${{ inputs.docker }}
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
|
128
.github/workflows/model_jobs_amd.yml
vendored
Normal file
128
.github/workflows/model_jobs_amd.yml
vendored
Normal file
@ -0,0 +1,128 @@
|
||||
name: model jobs
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
folder_slices:
|
||||
required: true
|
||||
type: string
|
||||
machine_type:
|
||||
required: true
|
||||
type: string
|
||||
slice_id:
|
||||
required: true
|
||||
type: number
|
||||
runner:
|
||||
required: true
|
||||
type: string
|
||||
docker:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
RUN_SLOW: yes
|
||||
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
|
||||
# This token is created under the bot `hf-transformers-bot`.
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
CUDA_VISIBLE_DEVICES: 0,1
|
||||
|
||||
jobs:
|
||||
run_models_gpu:
|
||||
name: " "
|
||||
strategy:
|
||||
max-parallel: 1 # For now, not to parallelize. Can change later if it works well.
|
||||
fail-fast: false
|
||||
matrix:
|
||||
folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
|
||||
runs-on: ['${{ inputs.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
|
||||
container:
|
||||
image: ${{ inputs.docker }}
|
||||
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Echo input and matrix info
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ inputs.folder_slices }}"
|
||||
echo "${{ matrix.folders }}"
|
||||
echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}"
|
||||
|
||||
- name: Echo folder ${{ matrix.folders }}
|
||||
shell: bash
|
||||
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
|
||||
# set the artifact folder names (because the character `/` is not allowed).
|
||||
run: |
|
||||
echo "${{ matrix.folders }}"
|
||||
matrix_folders=${{ matrix.folders }}
|
||||
matrix_folders=${matrix_folders/'models/'/'models_'}
|
||||
echo "$matrix_folders"
|
||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: Update / Install some packages (for Past CI)
|
||||
if: ${{ contains(inputs.docker, '-past-') }}
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pip install -U datasets
|
||||
|
||||
- name: Update / Install some packages (for Past CI)
|
||||
if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||
|
||||
- name: ROCM-SMI
|
||||
run: |
|
||||
rocm-smi
|
||||
|
||||
- name: ROCM-INFO
|
||||
run: |
|
||||
rocminfo | grep "Agent" -A 14
|
||||
|
||||
- name: Show ROCR environment
|
||||
run: |
|
||||
echo "ROCR: $ROCR_VISIBLE_DEVICES"
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run all tests on GPU
|
||||
working-directory: /transformers
|
||||
run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} -m "not not_device_test"
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
|
||||
|
||||
- name: Run test
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
|
||||
echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
|
||||
echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
|
||||
path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
|
68
.github/workflows/self-scheduled-caller.yml
vendored
68
.github/workflows/self-scheduled-caller.yml
vendored
@ -7,7 +7,7 @@ on:
|
||||
- cron: "17 2 * * *"
|
||||
push:
|
||||
branches:
|
||||
- allow_ci_to_use_a10
|
||||
- trigger-remove-script-datasets-in-tests
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
prev_workflow_run_id:
|
||||
@ -22,10 +22,10 @@ on:
|
||||
default: ""
|
||||
|
||||
|
||||
# Used for `push` to easily modify the target workflow runs to compare against
|
||||
# Used for `push` to easily modiffy the target workflow runs to compare against
|
||||
env:
|
||||
prev_workflow_run_id: ""
|
||||
other_workflow_run_id: ""
|
||||
other_workflow_run_id: "15770139098"
|
||||
|
||||
|
||||
jobs:
|
||||
@ -51,68 +51,8 @@ jobs:
|
||||
with:
|
||||
job: run_models_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-models"
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
# torch-pipeline:
|
||||
# name: Torch pipeline CI
|
||||
# uses: ./.github/workflows/self-scheduled.yml
|
||||
# with:
|
||||
# job: run_pipelines_torch_gpu
|
||||
# slack_report_channel: "#transformers-ci-daily-pipeline-torch"
|
||||
# runner: daily-ci
|
||||
# docker: huggingface/transformers-pytorch-gpu
|
||||
# ci_event: Daily CI
|
||||
# report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
# secrets: inherit
|
||||
#
|
||||
# example-ci:
|
||||
# name: Example CI
|
||||
# uses: ./.github/workflows/self-scheduled.yml
|
||||
# with:
|
||||
# job: run_examples_gpu
|
||||
# slack_report_channel: "#transformers-ci-daily-examples"
|
||||
# runner: daily-ci
|
||||
# docker: huggingface/transformers-all-latest-gpu
|
||||
# ci_event: Daily CI
|
||||
# report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
# secrets: inherit
|
||||
#
|
||||
# trainer-fsdp-ci:
|
||||
# name: Trainer/FSDP CI
|
||||
# uses: ./.github/workflows/self-scheduled.yml
|
||||
# with:
|
||||
# job: run_trainer_and_fsdp_gpu
|
||||
# slack_report_channel: "#transformers-ci-daily-training"
|
||||
# runner: daily-ci
|
||||
# docker: huggingface/transformers-all-latest-gpu
|
||||
# ci_event: Daily CI
|
||||
# report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
# secrets: inherit
|
||||
#
|
||||
# deepspeed-ci:
|
||||
# name: DeepSpeed CI
|
||||
# uses: ./.github/workflows/self-scheduled.yml
|
||||
# with:
|
||||
# job: run_torch_cuda_extensions_gpu
|
||||
# slack_report_channel: "#transformers-ci-daily-training"
|
||||
# runner: daily-ci
|
||||
# docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
|
||||
# ci_event: Daily CI
|
||||
# working-directory-prefix: /workspace
|
||||
# report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
# secrets: inherit
|
||||
#
|
||||
# quantization-ci:
|
||||
# name: Quantization CI
|
||||
# uses: ./.github/workflows/self-scheduled.yml
|
||||
# with:
|
||||
# job: run_quantization_torch_gpu
|
||||
# slack_report_channel: "#transformers-ci-daily-quantization"
|
||||
# runner: daily-ci
|
||||
# docker: huggingface/transformers-quantization-latest-gpu
|
||||
# ci_event: Daily CI
|
||||
# report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
# secrets: inherit
|
||||
|
10
.github/workflows/self-scheduled.yml
vendored
10
.github/workflows/self-scheduled.yml
vendored
@ -15,6 +15,9 @@ on:
|
||||
slack_report_channel:
|
||||
required: true
|
||||
type: string
|
||||
runner:
|
||||
required: true
|
||||
type: string
|
||||
docker:
|
||||
required: true
|
||||
type: string
|
||||
@ -59,7 +62,6 @@ jobs:
|
||||
outputs:
|
||||
folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
|
||||
slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
|
||||
runner_map: ${{ steps.set-matrix.outputs.runner_map }}
|
||||
quantization_matrix: ${{ steps.set-matrix-quantization.outputs.quantization_matrix }}
|
||||
steps:
|
||||
- name: Update clone
|
||||
@ -86,7 +88,6 @@ jobs:
|
||||
if [ "${{ inputs.job }}" = "run_models_gpu" ]; then
|
||||
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
|
||||
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
|
||||
echo "runner_map=$(python3 ../utils/get_runner_map.py)" >> $GITHUB_OUTPUT
|
||||
elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then
|
||||
echo "folder_slices=[['trainer'], ['fsdp']]" >> $GITHUB_OUTPUT
|
||||
echo "slice_ids=[0, 1]" >> $GITHUB_OUTPUT
|
||||
@ -110,14 +111,14 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
machine_type: [aws-g4dn-4xlarge-cache, aws-g4dn-12xlarge-cache]
|
||||
slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
|
||||
uses: ./.github/workflows/model_jobs.yml
|
||||
with:
|
||||
folder_slices: ${{ needs.setup.outputs.folder_slices }}
|
||||
machine_type: ${{ matrix.machine_type }}
|
||||
slice_id: ${{ matrix.slice_id }}
|
||||
runner_map: ${{ needs.setup.outputs.runner_map }}
|
||||
runner: ${{ inputs.runner }}
|
||||
docker: ${{ inputs.docker }}
|
||||
secrets: inherit
|
||||
|
||||
@ -135,6 +136,7 @@ jobs:
|
||||
folder_slices: ${{ needs.setup.outputs.folder_slices }}
|
||||
machine_type: ${{ matrix.machine_type }}
|
||||
slice_id: ${{ matrix.slice_id }}
|
||||
runner: ${{ inputs.runner }}
|
||||
docker: ${{ inputs.docker }}
|
||||
report_name_prefix: run_trainer_and_fsdp_gpu
|
||||
secrets: inherit
|
||||
|
18
Makefile
18
Makefile
@ -8,13 +8,19 @@ check_dirs := examples tests src utils
|
||||
exclude_folders := ""
|
||||
|
||||
modified_only_fixup:
|
||||
$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
|
||||
@if test -n "$(modified_py_files)"; then \
|
||||
echo "Checking/fixing $(modified_py_files)"; \
|
||||
ruff check $(modified_py_files) --fix --exclude $(exclude_folders); \
|
||||
ruff format $(modified_py_files) --exclude $(exclude_folders);\
|
||||
@current_branch=$$(git branch --show-current); \
|
||||
if [ "$$current_branch" = "main" ]; then \
|
||||
echo "On main branch, running 'style' target instead..."; \
|
||||
$(MAKE) style; \
|
||||
else \
|
||||
echo "No library .py files were modified"; \
|
||||
modified_py_files=$$(python utils/get_modified_files.py $(check_dirs)); \
|
||||
if [ -n "$$modified_py_files" ]; then \
|
||||
echo "Checking/fixing files: $${modified_py_files}"; \
|
||||
ruff check $${modified_py_files} --fix --exclude $(exclude_folders); \
|
||||
ruff format $${modified_py_files} --exclude $(exclude_folders); \
|
||||
else \
|
||||
echo "No library .py files were modified"; \
|
||||
fi; \
|
||||
fi
|
||||
|
||||
# Update src/transformers/dependency_versions_table.py
|
||||
|
@ -56,7 +56,7 @@ Here is how to use the processor to process text and audio:
|
||||
```python
|
||||
>>> # let's load an audio sample from an Arabic speech corpus
|
||||
>>> from datasets import load_dataset
|
||||
>>> dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True, trust_remote_code=True)
|
||||
>>> dataset = load_dataset("halabi2016/arabic_speech_corpus", split="test", streaming=True)
|
||||
>>> audio_sample = next(iter(dataset))["audio"]
|
||||
|
||||
>>> # now, process it
|
||||
|
@ -56,7 +56,7 @@ Here is how to use the processor to process text and audio:
|
||||
```python
|
||||
>>> # let's load an audio sample from an Arabic speech corpus
|
||||
>>> from datasets import load_dataset
|
||||
>>> dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True, trust_remote_code=True)
|
||||
>>> dataset = load_dataset("halabi2016/arabic_speech_corpus", split="test", streaming=True)
|
||||
>>> audio_sample = next(iter(dataset))["audio"]
|
||||
|
||||
>>> # now, process it
|
||||
|
@ -493,6 +493,33 @@ training_args = TrainingArguments(
|
||||
)
|
||||
```
|
||||
|
||||
You can also configure which specific kernels to apply using the `liger_kernel_config` parameter. This dict is passed as keyword arguments to the `_apply_liger_kernel_to_instance` function, allowing fine-grained control over kernel usage. Available options vary by model but typically include: `rope`, `swiglu`, `cross_entropy`, `fused_linear_cross_entropy`, `rms_norm`, etc.
|
||||
|
||||
```py
|
||||
from transformers import TrainingArguments
|
||||
|
||||
# Apply only specific kernels
|
||||
training_args = TrainingArguments(
|
||||
output_dir="your-model",
|
||||
learning_rate=2e-5,
|
||||
per_device_train_batch_size=16,
|
||||
per_device_eval_batch_size=16,
|
||||
num_train_epochs=2,
|
||||
weight_decay=0.01,
|
||||
eval_strategy="epoch",
|
||||
save_strategy="epoch",
|
||||
load_best_model_at_end=True,
|
||||
push_to_hub=True,
|
||||
use_liger_kernel=True,
|
||||
liger_kernel_config={
|
||||
"rope": True,
|
||||
"cross_entropy": True,
|
||||
"rms_norm": False, # Don't apply Liger's RMSNorm kernel
|
||||
"swiglu": True,
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
### NEFTune
|
||||
|
||||
[NEFTune](https://hf.co/papers/2310.05914) adds noise to the embedding vectors during training to improve model performance. Enable it in [`Trainer`] with the `neftune_noise_alpha` parameter in [`TrainingArguments`] to control how much noise is added.
|
||||
|
@ -264,7 +264,6 @@ class ExamplesTests(TestCasePlus):
|
||||
--dataset_config clean
|
||||
--train_split_name validation
|
||||
--eval_split_name validation
|
||||
--trust_remote_code
|
||||
--output_dir {tmp_dir}
|
||||
--overwrite_output_dir
|
||||
--num_train_epochs=2
|
||||
|
@ -312,7 +312,6 @@ class ExamplesTestsNoTrainer(TestCasePlus):
|
||||
{self.examples_dir}/pytorch/image-classification/run_image_classification_no_trainer.py
|
||||
--model_name_or_path google/vit-base-patch16-224-in21k
|
||||
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
||||
--trust_remote_code
|
||||
--learning_rate 1e-4
|
||||
--per_device_train_batch_size 2
|
||||
--per_device_eval_batch_size 1
|
||||
|
@ -390,7 +390,6 @@ class ExamplesTests(TestCasePlus):
|
||||
--output_dir {tmp_dir}
|
||||
--model_name_or_path google/vit-base-patch16-224-in21k
|
||||
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
||||
--trust_remote_code
|
||||
--do_train
|
||||
--do_eval
|
||||
--learning_rate 1e-4
|
||||
@ -424,7 +423,6 @@ class ExamplesTests(TestCasePlus):
|
||||
--dataset_config_name clean
|
||||
--train_split_name validation
|
||||
--eval_split_name validation
|
||||
--trust_remote_code
|
||||
--do_train
|
||||
--do_eval
|
||||
--learning_rate 1e-4
|
||||
@ -455,7 +453,6 @@ class ExamplesTests(TestCasePlus):
|
||||
--dataset_config_name clean
|
||||
--train_split_name validation
|
||||
--eval_split_name validation
|
||||
--trust_remote_code
|
||||
--do_train
|
||||
--do_eval
|
||||
--learning_rate 1e-4
|
||||
@ -488,7 +485,6 @@ class ExamplesTests(TestCasePlus):
|
||||
--dataset_config_name clean
|
||||
--train_split_name validation
|
||||
--eval_split_name validation
|
||||
--trust_remote_code
|
||||
--do_train
|
||||
--do_eval
|
||||
--learning_rate 1e-4
|
||||
@ -516,7 +512,6 @@ class ExamplesTests(TestCasePlus):
|
||||
--output_dir {tmp_dir}
|
||||
--model_name_or_path hf-internal-testing/tiny-random-wav2vec2
|
||||
--dataset_name anton-l/superb_demo
|
||||
--trust_remote_code
|
||||
--dataset_config_name ks
|
||||
--train_split_name test
|
||||
--eval_split_name test
|
||||
@ -551,7 +546,6 @@ class ExamplesTests(TestCasePlus):
|
||||
--dataset_name hf-internal-testing/librispeech_asr_dummy
|
||||
--dataset_config_names clean
|
||||
--dataset_split_names validation
|
||||
--trust_remote_code
|
||||
--learning_rate 1e-4
|
||||
--per_device_train_batch_size 4
|
||||
--per_device_eval_batch_size 4
|
||||
@ -572,7 +566,6 @@ class ExamplesTests(TestCasePlus):
|
||||
run_mae.py
|
||||
--output_dir {tmp_dir}
|
||||
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
||||
--trust_remote_code
|
||||
--do_train
|
||||
--do_eval
|
||||
--learning_rate 1e-4
|
||||
|
@ -315,7 +315,6 @@ class ExamplesTests(TestCasePlus):
|
||||
testargs = f"""
|
||||
run_image_classification.py
|
||||
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
||||
--trust_remote_code
|
||||
--model_name_or_path microsoft/resnet-18
|
||||
--do_train
|
||||
--do_eval
|
||||
|
@ -29,7 +29,6 @@ import warnings
|
||||
from collections import defaultdict
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from functools import partial, wraps
|
||||
from threading import Thread
|
||||
@ -41,7 +40,6 @@ from huggingface_hub import split_torch_state_dict_into_shards
|
||||
from packaging import version
|
||||
from torch import Tensor, nn
|
||||
from torch.distributions import constraints
|
||||
from torch.nn import CrossEntropyLoss, Identity
|
||||
from torch.utils.checkpoint import checkpoint
|
||||
|
||||
from transformers.utils import is_torchao_available
|
||||
@ -50,7 +48,6 @@ from transformers.utils import is_torchao_available
|
||||
if is_torchao_available():
|
||||
from torchao.quantization import Int4WeightOnlyConfig
|
||||
|
||||
from .activations import get_activation
|
||||
from .configuration_utils import PretrainedConfig
|
||||
from .dynamic_module_utils import custom_object_save
|
||||
from .generation import CompileConfig, GenerationConfig
|
||||
@ -98,7 +95,6 @@ from .utils import (
|
||||
WEIGHTS_INDEX_NAME,
|
||||
WEIGHTS_NAME,
|
||||
ContextManagers,
|
||||
ModelOutput,
|
||||
PushToHubMixin,
|
||||
cached_file,
|
||||
check_torch_load_is_safe,
|
||||
@ -123,7 +119,6 @@ from .utils import (
|
||||
is_torch_xla_available,
|
||||
is_torch_xpu_available,
|
||||
logging,
|
||||
replace_return_docstrings,
|
||||
strtobool,
|
||||
)
|
||||
from .utils.generic import GeneralInterface
|
||||
@ -5624,453 +5619,6 @@ if PreTrainedModel.push_to_hub.__doc__ is not None:
|
||||
)
|
||||
|
||||
|
||||
class PoolerStartLogits(nn.Module):
|
||||
"""
|
||||
Compute SQuAD start logits from sequence hidden states.
|
||||
|
||||
Args:
|
||||
config ([`PretrainedConfig`]):
|
||||
The config used by the model, will be used to grab the `hidden_size` of the model.
|
||||
"""
|
||||
|
||||
def __init__(self, config: PretrainedConfig):
|
||||
super().__init__()
|
||||
self.dense = nn.Linear(config.hidden_size, 1)
|
||||
logger.warning_once(
|
||||
"[DEPRECATION WARNING] `PoolerStartLogits` is deprecated and will be removed in v4.53. "
|
||||
"Please use model-specific class, e.g. `XLMPoolerStartLogits`."
|
||||
)
|
||||
|
||||
def forward(
|
||||
self, hidden_states: torch.FloatTensor, p_mask: Optional[torch.FloatTensor] = None
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`):
|
||||
The final hidden states of the model.
|
||||
p_mask (`torch.FloatTensor` of shape `(batch_size, seq_len)`, *optional*):
|
||||
Mask for tokens at invalid position, such as query and special symbols (PAD, SEP, CLS). 1.0 means token
|
||||
should be masked.
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: The start logits for SQuAD.
|
||||
"""
|
||||
x = self.dense(hidden_states).squeeze(-1)
|
||||
|
||||
if p_mask is not None:
|
||||
if get_parameter_dtype(self) == torch.float16:
|
||||
x = x * (1 - p_mask) - 65500 * p_mask
|
||||
else:
|
||||
x = x * (1 - p_mask) - 1e30 * p_mask
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class PoolerEndLogits(nn.Module):
|
||||
"""
|
||||
Compute SQuAD end logits from sequence hidden states.
|
||||
|
||||
Args:
|
||||
config ([`PretrainedConfig`]):
|
||||
The config used by the model, will be used to grab the `hidden_size` of the model and the `layer_norm_eps`
|
||||
to use.
|
||||
"""
|
||||
|
||||
def __init__(self, config: PretrainedConfig):
|
||||
super().__init__()
|
||||
self.dense_0 = nn.Linear(config.hidden_size * 2, config.hidden_size)
|
||||
self.activation = nn.Tanh()
|
||||
self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
|
||||
self.dense_1 = nn.Linear(config.hidden_size, 1)
|
||||
logger.warning_once(
|
||||
"[DEPRECATION WARNING] `PoolerEndLogits` is deprecated and will be removed in v4.53. "
|
||||
"Please use model-specific class, e.g. `XLMPoolerEndLogits`."
|
||||
)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
hidden_states: torch.FloatTensor,
|
||||
start_states: Optional[torch.FloatTensor] = None,
|
||||
start_positions: Optional[torch.LongTensor] = None,
|
||||
p_mask: Optional[torch.FloatTensor] = None,
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`):
|
||||
The final hidden states of the model.
|
||||
start_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`, *optional*):
|
||||
The hidden states of the first tokens for the labeled span.
|
||||
start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
||||
The position of the first token for the labeled span.
|
||||
p_mask (`torch.FloatTensor` of shape `(batch_size, seq_len)`, *optional*):
|
||||
Mask for tokens at invalid position, such as query and special symbols (PAD, SEP, CLS). 1.0 means token
|
||||
should be masked.
|
||||
|
||||
<Tip>
|
||||
|
||||
One of `start_states` or `start_positions` should be not `None`. If both are set, `start_positions` overrides
|
||||
`start_states`.
|
||||
|
||||
</Tip>
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: The end logits for SQuAD.
|
||||
"""
|
||||
assert start_states is not None or start_positions is not None, (
|
||||
"One of start_states, start_positions should be not None"
|
||||
)
|
||||
if start_positions is not None:
|
||||
slen, hsz = hidden_states.shape[-2:]
|
||||
start_positions = start_positions[:, None, None].expand(-1, -1, hsz) # shape (bsz, 1, hsz)
|
||||
start_states = hidden_states.gather(-2, start_positions) # shape (bsz, 1, hsz)
|
||||
start_states = start_states.expand(-1, slen, -1) # shape (bsz, slen, hsz)
|
||||
|
||||
x = self.dense_0(torch.cat([hidden_states, start_states], dim=-1))
|
||||
x = self.activation(x)
|
||||
x = self.LayerNorm(x)
|
||||
x = self.dense_1(x).squeeze(-1)
|
||||
|
||||
if p_mask is not None:
|
||||
if get_parameter_dtype(self) == torch.float16:
|
||||
x = x * (1 - p_mask) - 65500 * p_mask
|
||||
else:
|
||||
x = x * (1 - p_mask) - 1e30 * p_mask
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class PoolerAnswerClass(nn.Module):
|
||||
"""
|
||||
Compute SQuAD 2.0 answer class from classification and start tokens hidden states.
|
||||
|
||||
Args:
|
||||
config ([`PretrainedConfig`]):
|
||||
The config used by the model, will be used to grab the `hidden_size` of the model.
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
super().__init__()
|
||||
self.dense_0 = nn.Linear(config.hidden_size * 2, config.hidden_size)
|
||||
self.activation = nn.Tanh()
|
||||
self.dense_1 = nn.Linear(config.hidden_size, 1, bias=False)
|
||||
logger.warning_once(
|
||||
"[DEPRECATION WARNING] `PoolerAnswerClass` is deprecated and will be removed in v4.53. "
|
||||
"Please use model-specific class, e.g. `XLMPoolerAnswerClass`."
|
||||
)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
hidden_states: torch.FloatTensor,
|
||||
start_states: Optional[torch.FloatTensor] = None,
|
||||
start_positions: Optional[torch.LongTensor] = None,
|
||||
cls_index: Optional[torch.LongTensor] = None,
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`):
|
||||
The final hidden states of the model.
|
||||
start_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`, *optional*):
|
||||
The hidden states of the first tokens for the labeled span.
|
||||
start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
||||
The position of the first token for the labeled span.
|
||||
cls_index (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
||||
Position of the CLS token for each sentence in the batch. If `None`, takes the last token.
|
||||
|
||||
<Tip>
|
||||
|
||||
One of `start_states` or `start_positions` should be not `None`. If both are set, `start_positions` overrides
|
||||
`start_states`.
|
||||
|
||||
</Tip>
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: The SQuAD 2.0 answer class.
|
||||
"""
|
||||
# No dependency on end_feature so that we can obtain one single `cls_logits` for each sample.
|
||||
hsz = hidden_states.shape[-1]
|
||||
assert start_states is not None or start_positions is not None, (
|
||||
"One of start_states, start_positions should be not None"
|
||||
)
|
||||
if start_positions is not None:
|
||||
start_positions = start_positions[:, None, None].expand(-1, -1, hsz) # shape (bsz, 1, hsz)
|
||||
start_states = hidden_states.gather(-2, start_positions).squeeze(-2) # shape (bsz, hsz)
|
||||
|
||||
if cls_index is not None:
|
||||
cls_index = cls_index[:, None, None].expand(-1, -1, hsz) # shape (bsz, 1, hsz)
|
||||
cls_token_state = hidden_states.gather(-2, cls_index).squeeze(-2) # shape (bsz, hsz)
|
||||
else:
|
||||
cls_token_state = hidden_states[:, -1, :] # shape (bsz, hsz)
|
||||
|
||||
x = self.dense_0(torch.cat([start_states, cls_token_state], dim=-1))
|
||||
x = self.activation(x)
|
||||
x = self.dense_1(x).squeeze(-1)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
@dataclass
|
||||
class SquadHeadOutput(ModelOutput):
|
||||
"""
|
||||
Base class for outputs of question answering models using a [`~modeling_utils.SQuADHead`].
|
||||
|
||||
Args:
|
||||
loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned if both `start_positions` and `end_positions` are provided):
|
||||
Classification loss as the sum of start token, end token (and is_impossible if provided) classification
|
||||
losses.
|
||||
start_top_log_probs (`torch.FloatTensor` of shape `(batch_size, config.start_n_top)`, *optional*, returned if `start_positions` or `end_positions` is not provided):
|
||||
Log probabilities for the top config.start_n_top start token possibilities (beam-search).
|
||||
start_top_index (`torch.LongTensor` of shape `(batch_size, config.start_n_top)`, *optional*, returned if `start_positions` or `end_positions` is not provided):
|
||||
Indices for the top config.start_n_top start token possibilities (beam-search).
|
||||
end_top_log_probs (`torch.FloatTensor` of shape `(batch_size, config.start_n_top * config.end_n_top)`, *optional*, returned if `start_positions` or `end_positions` is not provided):
|
||||
Log probabilities for the top `config.start_n_top * config.end_n_top` end token possibilities
|
||||
(beam-search).
|
||||
end_top_index (`torch.LongTensor` of shape `(batch_size, config.start_n_top * config.end_n_top)`, *optional*, returned if `start_positions` or `end_positions` is not provided):
|
||||
Indices for the top `config.start_n_top * config.end_n_top` end token possibilities (beam-search).
|
||||
cls_logits (`torch.FloatTensor` of shape `(batch_size,)`, *optional*, returned if `start_positions` or `end_positions` is not provided):
|
||||
Log probabilities for the `is_impossible` label of the answers.
|
||||
|
||||
"""
|
||||
|
||||
loss: Optional[torch.FloatTensor] = None
|
||||
start_top_log_probs: Optional[torch.FloatTensor] = None
|
||||
start_top_index: Optional[torch.LongTensor] = None
|
||||
end_top_log_probs: Optional[torch.FloatTensor] = None
|
||||
end_top_index: Optional[torch.LongTensor] = None
|
||||
cls_logits: Optional[torch.FloatTensor] = None
|
||||
|
||||
def __post_init__(self):
|
||||
logger.warning_once(
|
||||
"[DEPRECATION WARNING] `SquadHeadOutput` is deprecated and will be removed in v4.53. "
|
||||
"Please use model-specific class, e.g. `XLMSquadHeadOutput`."
|
||||
)
|
||||
|
||||
|
||||
class SQuADHead(nn.Module):
|
||||
r"""
|
||||
A SQuAD head inspired by XLNet.
|
||||
|
||||
Args:
|
||||
config ([`PretrainedConfig`]):
|
||||
The config used by the model, will be used to grab the `hidden_size` of the model and the `layer_norm_eps`
|
||||
to use.
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
super().__init__()
|
||||
self.start_n_top = config.start_n_top
|
||||
self.end_n_top = config.end_n_top
|
||||
|
||||
self.start_logits = PoolerStartLogits(config)
|
||||
self.end_logits = PoolerEndLogits(config)
|
||||
self.answer_class = PoolerAnswerClass(config)
|
||||
|
||||
logger.warning_once(
|
||||
"[DEPRECATION WARNING] `SQuADHead` is deprecated and will be removed in v4.53. "
|
||||
"Please use model-specific class, e.g. `XLMSQuADHead`."
|
||||
)
|
||||
|
||||
@replace_return_docstrings(output_type=SquadHeadOutput, config_class=PretrainedConfig)
|
||||
def forward(
|
||||
self,
|
||||
hidden_states: torch.FloatTensor,
|
||||
start_positions: Optional[torch.LongTensor] = None,
|
||||
end_positions: Optional[torch.LongTensor] = None,
|
||||
cls_index: Optional[torch.LongTensor] = None,
|
||||
is_impossible: Optional[torch.LongTensor] = None,
|
||||
p_mask: Optional[torch.FloatTensor] = None,
|
||||
return_dict: bool = False,
|
||||
) -> Union[SquadHeadOutput, tuple[torch.FloatTensor]]:
|
||||
"""
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor` of shape `(batch_size, seq_len, hidden_size)`):
|
||||
Final hidden states of the model on the sequence tokens.
|
||||
start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
||||
Positions of the first token for the labeled span.
|
||||
end_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
||||
Positions of the last token for the labeled span.
|
||||
cls_index (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
||||
Position of the CLS token for each sentence in the batch. If `None`, takes the last token.
|
||||
is_impossible (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
||||
Whether the question has a possible answer in the paragraph or not.
|
||||
p_mask (`torch.FloatTensor` of shape `(batch_size, seq_len)`, *optional*):
|
||||
Mask for tokens at invalid position, such as query and special symbols (PAD, SEP, CLS). 1.0 means token
|
||||
should be masked.
|
||||
return_dict (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||||
|
||||
Returns:
|
||||
"""
|
||||
start_logits = self.start_logits(hidden_states, p_mask=p_mask)
|
||||
|
||||
if start_positions is not None and end_positions is not None:
|
||||
# If we are on multi-GPU, let's remove the dimension added by batch splitting
|
||||
for x in (start_positions, end_positions, cls_index, is_impossible):
|
||||
if x is not None and x.dim() > 1:
|
||||
x.squeeze_(-1)
|
||||
|
||||
# during training, compute the end logits based on the ground truth of the start position
|
||||
end_logits = self.end_logits(hidden_states, start_positions=start_positions, p_mask=p_mask)
|
||||
|
||||
loss_fct = CrossEntropyLoss()
|
||||
start_loss = loss_fct(start_logits, start_positions)
|
||||
end_loss = loss_fct(end_logits, end_positions)
|
||||
total_loss = (start_loss + end_loss) / 2
|
||||
|
||||
if cls_index is not None and is_impossible is not None:
|
||||
# Predict answerability from the representation of CLS and START
|
||||
cls_logits = self.answer_class(hidden_states, start_positions=start_positions, cls_index=cls_index)
|
||||
loss_fct_cls = nn.BCEWithLogitsLoss()
|
||||
cls_loss = loss_fct_cls(cls_logits, is_impossible)
|
||||
|
||||
# note(zhiliny): by default multiply the loss by 0.5 so that the scale is comparable to start_loss and end_loss
|
||||
total_loss += cls_loss * 0.5
|
||||
|
||||
return SquadHeadOutput(loss=total_loss) if return_dict else (total_loss,)
|
||||
|
||||
else:
|
||||
# during inference, compute the end logits based on beam search
|
||||
bsz, slen, hsz = hidden_states.size()
|
||||
start_log_probs = nn.functional.softmax(start_logits, dim=-1) # shape (bsz, slen)
|
||||
|
||||
start_top_log_probs, start_top_index = torch.topk(
|
||||
start_log_probs, self.start_n_top, dim=-1
|
||||
) # shape (bsz, start_n_top)
|
||||
start_top_index_exp = start_top_index.unsqueeze(-1).expand(-1, -1, hsz) # shape (bsz, start_n_top, hsz)
|
||||
start_states = torch.gather(hidden_states, -2, start_top_index_exp) # shape (bsz, start_n_top, hsz)
|
||||
start_states = start_states.unsqueeze(1).expand(-1, slen, -1, -1) # shape (bsz, slen, start_n_top, hsz)
|
||||
|
||||
hidden_states_expanded = hidden_states.unsqueeze(2).expand_as(
|
||||
start_states
|
||||
) # shape (bsz, slen, start_n_top, hsz)
|
||||
p_mask = p_mask.unsqueeze(-1) if p_mask is not None else None
|
||||
end_logits = self.end_logits(hidden_states_expanded, start_states=start_states, p_mask=p_mask)
|
||||
end_log_probs = nn.functional.softmax(end_logits, dim=1) # shape (bsz, slen, start_n_top)
|
||||
|
||||
end_top_log_probs, end_top_index = torch.topk(
|
||||
end_log_probs, self.end_n_top, dim=1
|
||||
) # shape (bsz, end_n_top, start_n_top)
|
||||
end_top_log_probs = end_top_log_probs.view(-1, self.start_n_top * self.end_n_top)
|
||||
end_top_index = end_top_index.view(-1, self.start_n_top * self.end_n_top)
|
||||
|
||||
start_states = torch.einsum("blh,bl->bh", hidden_states, start_log_probs)
|
||||
cls_logits = self.answer_class(hidden_states, start_states=start_states, cls_index=cls_index)
|
||||
|
||||
if not return_dict:
|
||||
return (start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits)
|
||||
else:
|
||||
return SquadHeadOutput(
|
||||
start_top_log_probs=start_top_log_probs,
|
||||
start_top_index=start_top_index,
|
||||
end_top_log_probs=end_top_log_probs,
|
||||
end_top_index=end_top_index,
|
||||
cls_logits=cls_logits,
|
||||
)
|
||||
|
||||
|
||||
class SequenceSummary(nn.Module):
|
||||
r"""
|
||||
Compute a single vector summary of a sequence hidden states.
|
||||
|
||||
Args:
|
||||
config ([`PretrainedConfig`]):
|
||||
The config used by the model. Relevant arguments in the config class of the model are (refer to the actual
|
||||
config class of your model for the default values it uses):
|
||||
|
||||
- **summary_type** (`str`) -- The method to use to make this summary. Accepted values are:
|
||||
|
||||
- `"last"` -- Take the last token hidden state (like XLNet)
|
||||
- `"first"` -- Take the first token hidden state (like Bert)
|
||||
- `"mean"` -- Take the mean of all tokens hidden states
|
||||
- `"cls_index"` -- Supply a Tensor of classification token position (GPT/GPT-2)
|
||||
- `"attn"` -- Not implemented now, use multi-head attention
|
||||
|
||||
- **summary_use_proj** (`bool`) -- Add a projection after the vector extraction.
|
||||
- **summary_proj_to_labels** (`bool`) -- If `True`, the projection outputs to `config.num_labels` classes
|
||||
(otherwise to `config.hidden_size`).
|
||||
- **summary_activation** (`Optional[str]`) -- Set to `"tanh"` to add a tanh activation to the output,
|
||||
another string or `None` will add no activation.
|
||||
- **summary_first_dropout** (`float`) -- Optional dropout probability before the projection and activation.
|
||||
- **summary_last_dropout** (`float`)-- Optional dropout probability after the projection and activation.
|
||||
"""
|
||||
|
||||
def __init__(self, config: PretrainedConfig):
|
||||
super().__init__()
|
||||
|
||||
self.summary_type = getattr(config, "summary_type", "last")
|
||||
if self.summary_type == "attn":
|
||||
# We should use a standard multi-head attention module with absolute positional embedding for that.
|
||||
# Cf. https://github.com/zihangdai/xlnet/blob/master/modeling.py#L253-L276
|
||||
# We can probably just use the multi-head attention module of PyTorch >=1.1.0
|
||||
raise NotImplementedError
|
||||
|
||||
self.summary = Identity()
|
||||
if hasattr(config, "summary_use_proj") and config.summary_use_proj:
|
||||
if hasattr(config, "summary_proj_to_labels") and config.summary_proj_to_labels and config.num_labels > 0:
|
||||
num_classes = config.num_labels
|
||||
else:
|
||||
num_classes = config.hidden_size
|
||||
self.summary = nn.Linear(config.hidden_size, num_classes)
|
||||
|
||||
activation_string = getattr(config, "summary_activation", None)
|
||||
self.activation: Callable = get_activation(activation_string) if activation_string else Identity()
|
||||
|
||||
self.first_dropout = Identity()
|
||||
if hasattr(config, "summary_first_dropout") and config.summary_first_dropout > 0:
|
||||
self.first_dropout = nn.Dropout(config.summary_first_dropout)
|
||||
|
||||
self.last_dropout = Identity()
|
||||
if hasattr(config, "summary_last_dropout") and config.summary_last_dropout > 0:
|
||||
self.last_dropout = nn.Dropout(config.summary_last_dropout)
|
||||
|
||||
logger.warning_once(
|
||||
"[DEPRECATION WARNING] `SequenceSummary` is deprecated and will be removed in v4.53. "
|
||||
"Please use model-specific class, e.g. `XLMSequenceSummary`."
|
||||
)
|
||||
|
||||
def forward(
|
||||
self, hidden_states: torch.FloatTensor, cls_index: Optional[torch.LongTensor] = None
|
||||
) -> torch.FloatTensor:
|
||||
"""
|
||||
Compute a single vector summary of a sequence hidden states.
|
||||
|
||||
Args:
|
||||
hidden_states (`torch.FloatTensor` of shape `[batch_size, seq_len, hidden_size]`):
|
||||
The hidden states of the last layer.
|
||||
cls_index (`torch.LongTensor` of shape `[batch_size]` or `[batch_size, ...]` where ... are optional leading dimensions of `hidden_states`, *optional*):
|
||||
Used if `summary_type == "cls_index"` and takes the last token of the sequence as classification token.
|
||||
|
||||
Returns:
|
||||
`torch.FloatTensor`: The summary of the sequence hidden states.
|
||||
"""
|
||||
if self.summary_type == "last":
|
||||
output = hidden_states[:, -1]
|
||||
elif self.summary_type == "first":
|
||||
output = hidden_states[:, 0]
|
||||
elif self.summary_type == "mean":
|
||||
output = hidden_states.mean(dim=1)
|
||||
elif self.summary_type == "cls_index":
|
||||
if cls_index is None:
|
||||
cls_index = torch.full_like(
|
||||
hidden_states[..., :1, :],
|
||||
hidden_states.shape[-2] - 1,
|
||||
dtype=torch.long,
|
||||
)
|
||||
else:
|
||||
cls_index = cls_index.unsqueeze(-1).unsqueeze(-1)
|
||||
cls_index = cls_index.expand((-1,) * (cls_index.dim() - 1) + (hidden_states.size(-1),))
|
||||
# shape of cls_index: (bsz, XX, 1, hidden_size) where XX are optional leading dim of hidden_states
|
||||
output = hidden_states.gather(-2, cls_index).squeeze(-2) # shape (bsz, XX, hidden_size)
|
||||
elif self.summary_type == "attn":
|
||||
raise NotImplementedError
|
||||
|
||||
output = self.first_dropout(output)
|
||||
output = self.summary(output)
|
||||
output = self.activation(output)
|
||||
output = self.last_dropout(output)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def unwrap_model(model: nn.Module, recursive: bool = False) -> nn.Module:
|
||||
"""
|
||||
Recursively unwraps a model from potential containers (as used in distributed training).
|
||||
|
@ -206,7 +206,7 @@ def convert_audio_spectrogram_transformer_checkpoint(model_name, pytorch_dump_fo
|
||||
|
||||
if "speech-commands" in model_name:
|
||||
# TODO: Convert dataset to Parquet
|
||||
dataset = load_dataset("google/speech_commands", "v0.02", split="validation", trust_remote_code=True)
|
||||
dataset = load_dataset("google/speech_commands", "v0.02", split="validation")
|
||||
waveform = dataset[0]["audio"]["array"]
|
||||
else:
|
||||
filepath = hf_hub_download(
|
||||
|
@ -245,6 +245,10 @@ TOKENIZER_MAPPING_NAMES = OrderedDict[str, tuple[Optional[str], Optional[str]]](
|
||||
("gpt_neox_japanese", ("GPTNeoXJapaneseTokenizer", None)),
|
||||
("gptj", ("GPT2Tokenizer", "GPT2TokenizerFast" if is_tokenizers_available() else None)),
|
||||
("gptsan-japanese", ("GPTSanJapaneseTokenizer", None)),
|
||||
("granite", ("GPT2Tokenizer", None)),
|
||||
("granitemoe", ("GPT2Tokenizer", None)),
|
||||
("granitemoehybrid", ("GPT2Tokenizer", None)),
|
||||
("granitemoeshared", ("GPT2Tokenizer", None)),
|
||||
("grounding-dino", ("BertTokenizer", "BertTokenizerFast" if is_tokenizers_available() else None)),
|
||||
("groupvit", ("CLIPTokenizer", "CLIPTokenizerFast" if is_tokenizers_available() else None)),
|
||||
("helium", (None, "PreTrainedTokenizerFast" if is_tokenizers_available() else None)),
|
||||
|
@ -266,7 +266,7 @@ def convert_beit_checkpoint(checkpoint_url, pytorch_dump_folder_path):
|
||||
# Check outputs on an image
|
||||
if is_semantic:
|
||||
image_processor = BeitImageProcessor(size=config.image_size, do_center_crop=False)
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||
image = Image.open(ds[0]["file"])
|
||||
else:
|
||||
image_processor = BeitImageProcessor(
|
||||
|
@ -15,7 +15,14 @@
|
||||
|
||||
"""English Normalizer class for CLVP."""
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
|
||||
if sys.version_info >= (3, 11):
|
||||
# Atomic grouping support was only added to the core RE in Python 3.11
|
||||
import re
|
||||
else:
|
||||
import regex as re
|
||||
|
||||
|
||||
class EnglishNormalizer:
|
||||
@ -199,12 +206,12 @@ class EnglishNormalizer:
|
||||
This method is used to normalize numbers within a text such as converting the numbers to words, removing
|
||||
commas, etc.
|
||||
"""
|
||||
text = re.sub(re.compile(r"([0-9][0-9\,]+[0-9])"), self._remove_commas, text)
|
||||
text = re.sub(re.compile(r"£([0-9\,]*[0-9]+)"), r"\1 pounds", text)
|
||||
text = re.sub(re.compile(r"\$([0-9\.\,]*[0-9]+)"), self._expand_dollars, text)
|
||||
text = re.sub(re.compile(r"([0-9]+\.[0-9]+)"), self._expand_decimal_point, text)
|
||||
text = re.sub(re.compile(r"[0-9]+(st|nd|rd|th)"), self._expand_ordinal, text)
|
||||
text = re.sub(re.compile(r"[0-9]+"), self._expand_number, text)
|
||||
text = re.sub(r"([0-9][0-9,]+[0-9])", self._remove_commas, text)
|
||||
text = re.sub(r"£([0-9,]*[0-9])", r"\1 pounds", text)
|
||||
text = re.sub(r"\$([0-9.,]*[0-9])", self._expand_dollars, text)
|
||||
text = re.sub(r"([0-9]++\.[0-9]+)", self._expand_decimal_point, text)
|
||||
text = re.sub(r"[0-9]++(st|nd|rd|th)", self._expand_ordinal, text)
|
||||
text = re.sub(r"[0-9]+", self._expand_number, text)
|
||||
return text
|
||||
|
||||
def expand_abbreviations(self, text: str) -> str:
|
||||
|
@ -226,7 +226,7 @@ def convert_wav2vec2_checkpoint(
|
||||
|
||||
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-lv60")
|
||||
|
||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True)
|
||||
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||
input_audio = [x["array"] for x in ds[:4]["audio"]]
|
||||
|
||||
inputs = processor(input_audio, return_tensors="pt", padding=True)
|
||||
|
@ -1223,7 +1223,7 @@ class LayoutLMForQuestionAnswering(LayoutLMPreTrainedModel):
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
|
||||
>>> model = LayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")
|
||||
|
||||
>>> dataset = load_dataset("nielsr/funsd", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> question = "what's his name?"
|
||||
>>> words = example["words"]
|
||||
|
@ -1601,7 +1601,7 @@ class TFLayoutLMForQuestionAnswering(TFLayoutLMPreTrainedModel, TFQuestionAnswer
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
|
||||
>>> model = TFLayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")
|
||||
|
||||
>>> dataset = load_dataset("nielsr/funsd", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> question = "what's his name?"
|
||||
>>> words = example["words"]
|
||||
|
@ -763,9 +763,8 @@ class LayoutLMv2Model(LayoutLMv2PreTrainedModel):
|
||||
>>> model = LayoutLMv2Model.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
||||
|
||||
|
||||
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa", trust_remote_code=True)
|
||||
>>> image_path = dataset["test"][0]["file"]
|
||||
>>> image = Image.open(image_path).convert("RGB")
|
||||
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa")
|
||||
>>> image = dataset["test"][0]["image"]
|
||||
|
||||
>>> encoding = processor(image, return_tensors="pt")
|
||||
|
||||
@ -953,7 +952,7 @@ class LayoutLMv2ForSequenceClassification(LayoutLMv2PreTrainedModel):
|
||||
|
||||
>>> set_seed(0)
|
||||
|
||||
>>> dataset = load_dataset("aharley/rvl_cdip", split="train", streaming=True, trust_remote_code=True)
|
||||
>>> dataset = load_dataset("aharley/rvl_cdip", split="train", streaming=True)
|
||||
>>> data = next(iter(dataset))
|
||||
>>> image = data["image"].convert("RGB")
|
||||
|
||||
@ -1155,7 +1154,7 @@ class LayoutLMv2ForTokenClassification(LayoutLMv2PreTrainedModel):
|
||||
|
||||
>>> set_seed(0)
|
||||
|
||||
>>> datasets = load_dataset("nielsr/funsd", split="test", trust_remote_code=True)
|
||||
>>> datasets = load_dataset("nielsr/funsd", split="test")
|
||||
>>> labels = datasets.features["ner_tags"].feature.names
|
||||
>>> id2label = {v: k for v, k in enumerate(labels)}
|
||||
|
||||
@ -1312,9 +1311,8 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel):
|
||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
||||
>>> model = LayoutLMv2ForQuestionAnswering.from_pretrained("microsoft/layoutlmv2-base-uncased")
|
||||
|
||||
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa", trust_remote_code=True)
|
||||
>>> image_path = dataset["test"][0]["file"]
|
||||
>>> image = Image.open(image_path).convert("RGB")
|
||||
>>> dataset = load_dataset("hf-internal-testing/fixtures_docvqa")
|
||||
>>> image = dataset["test"][0]["image"]
|
||||
>>> question = "When is coffee break?"
|
||||
>>> encoding = processor(image, question, return_tensors="pt")
|
||||
|
||||
|
@ -746,7 +746,7 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
|
||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||
>>> model = AutoModel.from_pretrained("microsoft/layoutlmv3-base")
|
||||
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> image = example["image"]
|
||||
>>> words = example["tokens"]
|
||||
@ -961,7 +961,7 @@ class LayoutLMv3ForTokenClassification(LayoutLMv3PreTrainedModel):
|
||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||
>>> model = AutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7)
|
||||
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> image = example["image"]
|
||||
>>> words = example["tokens"]
|
||||
@ -1062,7 +1062,7 @@ class LayoutLMv3ForQuestionAnswering(LayoutLMv3PreTrainedModel):
|
||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||
>>> model = AutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")
|
||||
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> image = example["image"]
|
||||
>>> question = "what's his name?"
|
||||
@ -1182,7 +1182,7 @@ class LayoutLMv3ForSequenceClassification(LayoutLMv3PreTrainedModel):
|
||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||
>>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
|
||||
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> image = example["image"]
|
||||
>>> words = example["tokens"]
|
||||
|
@ -1296,7 +1296,7 @@ class TFLayoutLMv3Model(TFLayoutLMv3PreTrainedModel):
|
||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||
>>> model = TFAutoModel.from_pretrained("microsoft/layoutlmv3-base")
|
||||
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> image = example["image"]
|
||||
>>> words = example["tokens"]
|
||||
@ -1439,7 +1439,7 @@ class TFLayoutLMv3ForSequenceClassification(TFLayoutLMv3PreTrainedModel, TFSeque
|
||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||
>>> model = TFAutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
|
||||
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> image = example["image"]
|
||||
>>> words = example["tokens"]
|
||||
@ -1566,7 +1566,7 @@ class TFLayoutLMv3ForTokenClassification(TFLayoutLMv3PreTrainedModel, TFTokenCla
|
||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||
>>> model = TFAutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7)
|
||||
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> image = example["image"]
|
||||
>>> words = example["tokens"]
|
||||
@ -1703,7 +1703,7 @@ class TFLayoutLMv3ForQuestionAnswering(TFLayoutLMv3PreTrainedModel, TFQuestionAn
|
||||
>>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
|
||||
>>> model = TFAutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")
|
||||
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> image = example["image"]
|
||||
>>> question = "what's his name?"
|
||||
|
@ -653,7 +653,7 @@ class LiltModel(LiltPreTrainedModel):
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||
>>> model = AutoModel.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> words = example["tokens"]
|
||||
>>> boxes = example["bboxes"]
|
||||
@ -793,7 +793,7 @@ class LiltForSequenceClassification(LiltPreTrainedModel):
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||
>>> model = AutoModelForSequenceClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> words = example["tokens"]
|
||||
>>> boxes = example["bboxes"]
|
||||
@ -908,7 +908,7 @@ class LiltForTokenClassification(LiltPreTrainedModel):
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||
>>> model = AutoModelForTokenClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> words = example["tokens"]
|
||||
>>> boxes = example["bboxes"]
|
||||
@ -1025,7 +1025,7 @@ class LiltForQuestionAnswering(LiltPreTrainedModel):
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||
>>> model = AutoModelForQuestionAnswering.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
|
||||
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> words = example["tokens"]
|
||||
>>> boxes = example["bboxes"]
|
||||
|
@ -2228,7 +2228,7 @@ class SpeechT5ForSpeechToText(SpeechT5PreTrainedModel, GenerationMixin):
|
||||
>>> from datasets import load_dataset
|
||||
|
||||
>>> dataset = load_dataset(
|
||||
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True
|
||||
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation"
|
||||
... ) # doctest: +IGNORE_RESULT
|
||||
>>> dataset = dataset.sort("id")
|
||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||
@ -2909,7 +2909,7 @@ class SpeechT5ForSpeechToSpeech(SpeechT5PreTrainedModel):
|
||||
>>> import torch
|
||||
|
||||
>>> dataset = load_dataset(
|
||||
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True
|
||||
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation"
|
||||
... ) # doctest: +IGNORE_RESULT
|
||||
>>> dataset = dataset.sort("id")
|
||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||
|
@ -1604,7 +1604,7 @@ class UdopModel(UdopPreTrainedModel):
|
||||
|
||||
>>> # load an example image, along with the words and coordinates
|
||||
>>> # which were extracted using an OCR engine
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> image = example["image"]
|
||||
>>> words = example["tokens"]
|
||||
@ -1813,7 +1813,7 @@ class UdopForConditionalGeneration(UdopPreTrainedModel, GenerationMixin):
|
||||
|
||||
>>> # load an example image, along with the words and coordinates
|
||||
>>> # which were extracted using an OCR engine
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> image = example["image"]
|
||||
>>> words = example["tokens"]
|
||||
@ -2025,7 +2025,7 @@ class UdopEncoderModel(UdopPreTrainedModel):
|
||||
|
||||
>>> # load an example image, along with the words and coordinates
|
||||
>>> # which were extracted using an OCR engine
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
|
||||
>>> example = dataset[0]
|
||||
>>> image = example["image"]
|
||||
>>> words = example["tokens"]
|
||||
|
@ -590,7 +590,7 @@ class Wav2Vec2CTCTokenizer(PreTrainedTokenizer):
|
||||
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
|
||||
|
||||
>>> # load first sample of English common_voice
|
||||
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True)
|
||||
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
|
||||
>>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
||||
>>> dataset_iter = iter(dataset)
|
||||
>>> sample = next(dataset_iter)
|
||||
|
@ -546,7 +546,7 @@ class Wav2Vec2ProcessorWithLM(ProcessorMixin):
|
||||
>>> processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")
|
||||
|
||||
>>> # load first sample of English common_voice
|
||||
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True)
|
||||
>>> dataset = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
|
||||
>>> dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
||||
>>> dataset_iter = iter(dataset)
|
||||
>>> sample = next(dataset_iter)
|
||||
|
@ -1670,7 +1670,7 @@ FLAX_WHISPER_AUDIO_CLASSIFICATION_DOCSTRING = r"""
|
||||
>>> model = FlaxWhisperForAudioClassification.from_pretrained(
|
||||
... "sanchit-gandhi/whisper-medium-fleurs-lang-id", from_pt=True
|
||||
... )
|
||||
>>> ds = load_dataset("google/fleurs", "all", split="validation", streaming=True, trust_remote_code=True)
|
||||
>>> ds = load_dataset("google/fleurs", "all", split="validation", streaming=True)
|
||||
|
||||
>>> sample = next(iter(ds))
|
||||
|
||||
|
@ -526,12 +526,15 @@ class Trainer:
|
||||
if is_liger_kernel_available():
|
||||
from liger_kernel.transformers import _apply_liger_kernel_to_instance
|
||||
|
||||
# Prepare kernel config - use provided config or default (empty dict for default behavior)
|
||||
kernel_config = self.args.liger_kernel_config if self.args.liger_kernel_config is not None else {}
|
||||
|
||||
if isinstance(model, PreTrainedModel):
|
||||
# Patch the model with liger kernels. Use the default kernel configurations.
|
||||
_apply_liger_kernel_to_instance(model=model)
|
||||
# Patch the model with liger kernels. Use the the specified or default kernel configurations.
|
||||
_apply_liger_kernel_to_instance(model=model, **kernel_config)
|
||||
elif hasattr(model, "get_base_model") and isinstance(model.get_base_model(), PreTrainedModel):
|
||||
# Patch the base model with liger kernels where model is a PeftModel. Use the default kernel configurations.
|
||||
_apply_liger_kernel_to_instance(model=model.get_base_model())
|
||||
# Patch the base model with liger kernels where model is a PeftModel. Use the specified or default kernel configurations.
|
||||
_apply_liger_kernel_to_instance(model=model.get_base_model(), **kernel_config)
|
||||
else:
|
||||
logger.warning(
|
||||
"The model is not an instance of PreTrainedModel. No liger kernels will be applied."
|
||||
|
@ -793,6 +793,11 @@ class TrainingArguments:
|
||||
It can effectively increase multi-GPU training throughput by ~20% and reduces memory usage by ~60%, works out of the box with
|
||||
flash attention, PyTorch FSDP, and Microsoft DeepSpeed. Currently, it supports llama, mistral, mixtral and gemma models.
|
||||
|
||||
liger_kernel_config (`Optional[dict]`, *optional*):
|
||||
Configuration to be used for Liger Kernel. When use_liger_kernel=True, this dict is passed as keyword arguments to the
|
||||
`_apply_liger_kernel_to_instance` function, which specifies which kernels to apply. Available options vary by model but typically
|
||||
include: 'rope', 'swiglu', 'cross_entropy', 'fused_linear_cross_entropy', 'rms_norm', etc. If `None`, use the default kernel configurations.
|
||||
|
||||
average_tokens_across_devices (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to average tokens across devices. If enabled, will use all_reduce to synchronize
|
||||
num_tokens_in_batch for precise loss calculation. Reference:
|
||||
@ -1525,6 +1530,19 @@ class TrainingArguments:
|
||||
metadata={"help": "Whether or not to enable the Liger Kernel for model training."},
|
||||
)
|
||||
|
||||
liger_kernel_config: Optional[dict[str, bool]] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": (
|
||||
"Configuration to be used for Liger Kernel. When use_liger_kernel=True, "
|
||||
"this dict is passed as keyword arguments to the `_apply_liger_kernel_to_instance` function, "
|
||||
"which specifies which kernels to apply. Available options vary by model "
|
||||
"but typically include: 'rope', 'swiglu', 'cross_entropy', 'fused_linear_cross_entropy', "
|
||||
"'rms_norm', etc. If None, use the default kernel configurations."
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
eval_use_gather_object: Optional[bool] = field(
|
||||
default=False,
|
||||
metadata={
|
||||
|
@ -423,7 +423,7 @@ PT_SPEECH_BASE_MODEL_SAMPLE = r"""
|
||||
>>> import torch
|
||||
>>> from datasets import load_dataset
|
||||
|
||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
||||
>>> dataset = dataset.sort("id")
|
||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||
|
||||
@ -449,7 +449,7 @@ PT_SPEECH_CTC_SAMPLE = r"""
|
||||
>>> from datasets import load_dataset
|
||||
>>> import torch
|
||||
|
||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
||||
>>> dataset = dataset.sort("id")
|
||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||
|
||||
@ -484,7 +484,7 @@ PT_SPEECH_SEQ_CLASS_SAMPLE = r"""
|
||||
>>> from datasets import load_dataset
|
||||
>>> import torch
|
||||
|
||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
||||
>>> dataset = dataset.sort("id")
|
||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||
|
||||
@ -520,7 +520,7 @@ PT_SPEECH_FRAME_CLASS_SAMPLE = r"""
|
||||
>>> from datasets import load_dataset
|
||||
>>> import torch
|
||||
|
||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
||||
>>> dataset = dataset.sort("id")
|
||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||
|
||||
@ -549,7 +549,7 @@ PT_SPEECH_XVECTOR_SAMPLE = r"""
|
||||
>>> from datasets import load_dataset
|
||||
>>> import torch
|
||||
|
||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
||||
>>> dataset = dataset.sort("id")
|
||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||
|
||||
@ -584,7 +584,7 @@ PT_VISION_BASE_MODEL_SAMPLE = r"""
|
||||
>>> import torch
|
||||
>>> from datasets import load_dataset
|
||||
|
||||
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("huggingface/cats-image")
|
||||
>>> image = dataset["test"]["image"][0]
|
||||
|
||||
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
||||
@ -609,7 +609,7 @@ PT_VISION_SEQ_CLASS_SAMPLE = r"""
|
||||
>>> import torch
|
||||
>>> from datasets import load_dataset
|
||||
|
||||
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("huggingface/cats-image")
|
||||
>>> image = dataset["test"]["image"][0]
|
||||
|
||||
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
||||
@ -1194,7 +1194,7 @@ TF_SPEECH_BASE_MODEL_SAMPLE = r"""
|
||||
>>> from transformers import AutoProcessor, {model_class}
|
||||
>>> from datasets import load_dataset
|
||||
|
||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
||||
>>> dataset = dataset.sort("id")
|
||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||
|
||||
@ -1219,7 +1219,7 @@ TF_SPEECH_CTC_SAMPLE = r"""
|
||||
>>> from datasets import load_dataset
|
||||
>>> import tensorflow as tf
|
||||
|
||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
||||
>>> dataset = dataset.sort("id")
|
||||
>>> sampling_rate = dataset.features["audio"].sampling_rate
|
||||
|
||||
@ -1254,7 +1254,7 @@ TF_VISION_BASE_MODEL_SAMPLE = r"""
|
||||
>>> from transformers import AutoImageProcessor, {model_class}
|
||||
>>> from datasets import load_dataset
|
||||
|
||||
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("huggingface/cats-image")
|
||||
>>> image = dataset["test"]["image"][0]
|
||||
|
||||
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
||||
@ -1277,7 +1277,7 @@ TF_VISION_SEQ_CLASS_SAMPLE = r"""
|
||||
>>> import tensorflow as tf
|
||||
>>> from datasets import load_dataset
|
||||
|
||||
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
|
||||
>>> dataset = load_dataset("huggingface/cats-image"))
|
||||
>>> image = dataset["test"]["image"][0]
|
||||
|
||||
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
|
||||
|
@ -269,7 +269,6 @@ def make_task_cmds():
|
||||
"img_clas": f"""
|
||||
{scripts_dir}/image-classification/run_image_classification.py
|
||||
--dataset_name hf-internal-testing/cats_vs_dogs_sample
|
||||
--trust_remote_code
|
||||
--remove_unused_columns False
|
||||
--max_steps 10
|
||||
--image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json
|
||||
|
@ -27,8 +27,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
if is_vision_available():
|
||||
from PIL import Image
|
||||
|
||||
from transformers import BeitImageProcessor
|
||||
|
||||
if is_torchvision_available():
|
||||
@ -98,23 +96,14 @@ class BeitImageProcessingTester:
|
||||
|
||||
|
||||
def prepare_semantic_single_inputs():
|
||||
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||
|
||||
image = Image.open(dataset[0]["file"])
|
||||
map = Image.open(dataset[1]["file"])
|
||||
|
||||
return image, map
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||
example = ds[0]
|
||||
return example["image"], example["map"]
|
||||
|
||||
|
||||
def prepare_semantic_batch_inputs():
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||
|
||||
image1 = Image.open(ds[0]["file"])
|
||||
map1 = Image.open(ds[1]["file"])
|
||||
image2 = Image.open(ds[2]["file"])
|
||||
map2 = Image.open(ds[3]["file"])
|
||||
|
||||
return [image1, image2], [map1, map2]
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||
return list(ds["image"][:2]), list(ds["map"][:2])
|
||||
|
||||
|
||||
@require_torch
|
||||
|
@ -504,7 +504,7 @@ class BeitModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
|
||||
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||
image = Image.open(ds[0]["file"])
|
||||
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
|
||||
|
||||
@ -547,7 +547,7 @@ class BeitModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
|
||||
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||
image = Image.open(ds[0]["file"])
|
||||
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
|
||||
|
||||
|
@ -669,7 +669,7 @@ class Data2VecAudioModelIntegrationTest(unittest.TestCase):
|
||||
return [x["array"] for x in speech_samples]
|
||||
|
||||
def _load_superb(self, task, num_samples):
|
||||
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
||||
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
||||
|
||||
return ds[:num_samples]
|
||||
|
||||
|
@ -29,8 +29,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
if is_vision_available():
|
||||
from PIL import Image
|
||||
|
||||
from transformers import DPTImageProcessor
|
||||
|
||||
if is_torchvision_available():
|
||||
@ -94,24 +92,15 @@ class DPTImageProcessingTester:
|
||||
|
||||
# Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_single_inputs
|
||||
def prepare_semantic_single_inputs():
|
||||
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||
|
||||
image = Image.open(dataset[0]["file"])
|
||||
map = Image.open(dataset[1]["file"])
|
||||
|
||||
return image, map
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||
example = ds[0]
|
||||
return example["image"], example["map"]
|
||||
|
||||
|
||||
# Copied from transformers.tests.models.beit.test_image_processing_beit.prepare_semantic_batch_inputs
|
||||
def prepare_semantic_batch_inputs():
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||
|
||||
image1 = Image.open(ds[0]["file"])
|
||||
map1 = Image.open(ds[1]["file"])
|
||||
image2 = Image.open(ds[2]["file"])
|
||||
map2 = Image.open(ds[3]["file"])
|
||||
|
||||
return [image1, image2], [map1, map2]
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||
return list(ds["image"][:2]), list(ds["map"][:2])
|
||||
|
||||
|
||||
@require_torch
|
||||
|
@ -767,7 +767,7 @@ class HubertModelIntegrationTest(unittest.TestCase):
|
||||
def _load_superb(self, task, num_samples):
|
||||
from datasets import load_dataset
|
||||
|
||||
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
||||
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
||||
|
||||
return ds[:num_samples]
|
||||
|
||||
|
@ -111,13 +111,13 @@ class LayoutLMv2ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
|
||||
def test_layoutlmv2_integration_test(self):
|
||||
from datasets import load_dataset
|
||||
|
||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
||||
|
||||
for image_processing_class in self.image_processor_list:
|
||||
# with apply_OCR = True
|
||||
image_processing = image_processing_class()
|
||||
|
||||
image = Image.open(ds[0]["file"]).convert("RGB")
|
||||
image = ds[0]["image"]
|
||||
|
||||
encoding = image_processing(image, return_tensors="pt")
|
||||
|
||||
|
@ -156,7 +156,7 @@ class LayoutLMv2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
from datasets import load_dataset
|
||||
|
||||
# set up
|
||||
datasets = load_dataset("nielsr/funsd", trust_remote_code=True)
|
||||
datasets = load_dataset("nielsr/funsd")
|
||||
processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
|
||||
|
||||
def preprocess_data(examples):
|
||||
@ -192,12 +192,8 @@ class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
|
||||
# we verify our implementation on 2 document images from the DocVQA dataset
|
||||
from datasets import load_dataset
|
||||
|
||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
||||
|
||||
image_1 = Image.open(ds[0]["file"]).convert("RGB")
|
||||
image_2 = Image.open(ds[1]["file"]).convert("RGB")
|
||||
|
||||
return image_1, image_2
|
||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
||||
return ds[0]["image"], ds[1]["image"]
|
||||
|
||||
@cached_property
|
||||
def get_tokenizers(self):
|
||||
|
@ -22,8 +22,6 @@ from ...test_image_processing_common import ImageProcessingTestMixin, prepare_im
|
||||
|
||||
|
||||
if is_pytesseract_available():
|
||||
from PIL import Image
|
||||
|
||||
from transformers import LayoutLMv3ImageProcessor
|
||||
|
||||
if is_torchvision_available():
|
||||
@ -106,13 +104,13 @@ class LayoutLMv3ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
|
||||
def test_LayoutLMv3_integration_test(self):
|
||||
from datasets import load_dataset
|
||||
|
||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
||||
|
||||
# with apply_OCR = True
|
||||
for image_processing_class in self.image_processor_list:
|
||||
image_processor = image_processing_class()
|
||||
|
||||
image = Image.open(ds[0]["file"]).convert("RGB")
|
||||
image = ds[0]["image"].convert("RGB")
|
||||
|
||||
encoding = image_processor(image, return_tensors="pt")
|
||||
|
||||
|
@ -28,8 +28,6 @@ from ...test_processing_common import ProcessorTesterMixin
|
||||
|
||||
|
||||
if is_pytesseract_available():
|
||||
from PIL import Image
|
||||
|
||||
from transformers import LayoutLMv3ImageProcessor
|
||||
|
||||
|
||||
@ -172,12 +170,8 @@ class LayoutLMv3ProcessorIntegrationTests(unittest.TestCase):
|
||||
# we verify our implementation on 2 document images from the DocVQA dataset
|
||||
from datasets import load_dataset
|
||||
|
||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
||||
|
||||
image_1 = Image.open(ds[0]["file"]).convert("RGB")
|
||||
image_2 = Image.open(ds[1]["file"]).convert("RGB")
|
||||
|
||||
return image_1, image_2
|
||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
||||
return ds[0]["image"], ds[1]["image"]
|
||||
|
||||
@cached_property
|
||||
def get_tokenizers(self):
|
||||
|
@ -162,7 +162,7 @@ class LayoutXLMProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
from datasets import load_dataset
|
||||
|
||||
# set up
|
||||
datasets = load_dataset("nielsr/funsd", trust_remote_code=True)
|
||||
datasets = load_dataset("nielsr/funsd")
|
||||
processor = LayoutXLMProcessor.from_pretrained("microsoft/layoutxlm-base", apply_ocr=False)
|
||||
|
||||
def preprocess_data(examples):
|
||||
@ -200,12 +200,8 @@ class LayoutXLMProcessorIntegrationTests(unittest.TestCase):
|
||||
# we verify our implementation on 2 document images from the DocVQA dataset
|
||||
from datasets import load_dataset
|
||||
|
||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
||||
|
||||
image_1 = Image.open(ds[0]["file"]).convert("RGB")
|
||||
image_2 = Image.open(ds[1]["file"]).convert("RGB")
|
||||
|
||||
return image_1, image_2
|
||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
||||
return ds[0]["image"], ds[1]["image"]
|
||||
|
||||
@cached_property
|
||||
def get_tokenizers(self):
|
||||
|
@ -27,8 +27,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
if is_vision_available():
|
||||
from PIL import Image
|
||||
|
||||
from transformers import MobileViTImageProcessor
|
||||
|
||||
|
||||
@ -86,23 +84,14 @@ class MobileViTImageProcessingTester:
|
||||
|
||||
|
||||
def prepare_semantic_single_inputs():
|
||||
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||
|
||||
image = Image.open(dataset[0]["file"])
|
||||
map = Image.open(dataset[1]["file"])
|
||||
|
||||
return image, map
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||
example = ds[0]
|
||||
return example["image"], example["map"]
|
||||
|
||||
|
||||
def prepare_semantic_batch_inputs():
|
||||
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||
|
||||
image1 = Image.open(dataset[0]["file"])
|
||||
map1 = Image.open(dataset[1]["file"])
|
||||
image2 = Image.open(dataset[2]["file"])
|
||||
map2 = Image.open(dataset[3]["file"])
|
||||
|
||||
return [image1, image2], [map1, map2]
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||
return list(ds["image"][:2]), list(ds["map"][:2])
|
||||
|
||||
|
||||
@require_torch
|
||||
|
@ -86,8 +86,12 @@ class NougatImageProcessingTester:
|
||||
return self.num_channels, self.size["height"], self.size["width"]
|
||||
|
||||
def prepare_dummy_image(self):
|
||||
revision = "ec57bf8c8b1653a209c13f6e9ee66b12df0fc2db"
|
||||
filepath = hf_hub_download(
|
||||
repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_pdf.png", repo_type="dataset"
|
||||
repo_id="hf-internal-testing/fixtures_docvqa",
|
||||
filename="nougat_pdf.png",
|
||||
repo_type="dataset",
|
||||
revision=revision,
|
||||
)
|
||||
image = Image.open(filepath).convert("RGB")
|
||||
return image
|
||||
@ -179,8 +183,12 @@ class NougatImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
|
||||
self.assertEqual((3, 100, 200), aligned_image.shape)
|
||||
|
||||
def prepare_dummy_np_image(self):
|
||||
revision = "ec57bf8c8b1653a209c13f6e9ee66b12df0fc2db"
|
||||
filepath = hf_hub_download(
|
||||
repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_pdf.png", repo_type="dataset"
|
||||
repo_id="hf-internal-testing/fixtures_docvqa",
|
||||
filename="nougat_pdf.png",
|
||||
repo_type="dataset",
|
||||
revision=revision,
|
||||
)
|
||||
image = Image.open(filepath).convert("RGB")
|
||||
return np.array(image)
|
||||
|
@ -842,11 +842,8 @@ def prepare_img():
|
||||
|
||||
# Helper functions for optical flow integration test
|
||||
def prepare_optical_flow_images():
|
||||
dataset = load_dataset("hf-internal-testing/fixtures_sintel", split="test", trust_remote_code=True)
|
||||
image1 = Image.open(dataset[0]["file"]).convert("RGB")
|
||||
image2 = Image.open(dataset[0]["file"]).convert("RGB")
|
||||
|
||||
return image1, image2
|
||||
ds = load_dataset("hf-internal-testing/fixtures_sintel", split="test")
|
||||
return list(ds["image"][:2])
|
||||
|
||||
|
||||
def normalize(img):
|
||||
|
@ -27,8 +27,6 @@ if is_torch_available():
|
||||
import torch
|
||||
|
||||
if is_vision_available():
|
||||
from PIL import Image
|
||||
|
||||
from transformers import SegformerImageProcessor
|
||||
|
||||
|
||||
@ -86,23 +84,14 @@ class SegformerImageProcessingTester:
|
||||
|
||||
|
||||
def prepare_semantic_single_inputs():
|
||||
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||
|
||||
image = Image.open(dataset[0]["file"])
|
||||
map = Image.open(dataset[1]["file"])
|
||||
|
||||
return image, map
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||
example = ds[0]
|
||||
return example["image"], example["map"]
|
||||
|
||||
|
||||
def prepare_semantic_batch_inputs():
|
||||
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||
|
||||
image1 = Image.open(dataset[0]["file"])
|
||||
map1 = Image.open(dataset[1]["file"])
|
||||
image2 = Image.open(dataset[2]["file"])
|
||||
map2 = Image.open(dataset[3]["file"])
|
||||
|
||||
return [image1, image2], [map1, map2]
|
||||
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||
return list(ds["image"][:2]), list(ds["map"][:2])
|
||||
|
||||
|
||||
@require_torch
|
||||
|
@ -184,7 +184,7 @@ class UdopProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
from datasets import load_dataset
|
||||
|
||||
# set up
|
||||
datasets = load_dataset("nielsr/funsd", trust_remote_code=True)
|
||||
datasets = load_dataset("nielsr/funsd")
|
||||
processor = UdopProcessor.from_pretrained("microsoft/udop-large", apply_ocr=False)
|
||||
|
||||
def preprocess_data(examples):
|
||||
@ -222,12 +222,8 @@ class UdopProcessorIntegrationTests(unittest.TestCase):
|
||||
# we verify our implementation on 2 document images from the DocVQA dataset
|
||||
from datasets import load_dataset
|
||||
|
||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
|
||||
|
||||
image_1 = Image.open(ds[0]["file"]).convert("RGB")
|
||||
image_2 = Image.open(ds[1]["file"]).convert("RGB")
|
||||
|
||||
return image_1, image_2
|
||||
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
|
||||
return ds[0]["image"], ds[1]["image"]
|
||||
|
||||
@cached_property
|
||||
def get_tokenizers(self):
|
||||
|
@ -566,7 +566,7 @@ class UniSpeechModelIntegrationTest(unittest.TestCase):
|
||||
return [x["array"] for x in speech_samples]
|
||||
|
||||
def _load_superb(self, task, num_samples):
|
||||
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
||||
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
||||
|
||||
return ds[:num_samples]
|
||||
|
||||
|
@ -820,7 +820,7 @@ class UniSpeechSatModelIntegrationTest(unittest.TestCase):
|
||||
return [x["array"] for x in speech_samples]
|
||||
|
||||
def _load_superb(self, task, num_samples):
|
||||
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
||||
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
||||
|
||||
return ds[:num_samples]
|
||||
|
||||
|
@ -637,9 +637,9 @@ class ViltModelIntegrationTest(unittest.TestCase):
|
||||
|
||||
processor = self.default_processor
|
||||
|
||||
dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="test", trust_remote_code=True)
|
||||
image1 = Image.open(dataset[0]["file"]).convert("RGB")
|
||||
image2 = Image.open(dataset[1]["file"]).convert("RGB")
|
||||
dataset = load_dataset("hf-internal-testing/fixtures_nlvr2", split="train")
|
||||
image1 = dataset[0]["image"]
|
||||
image2 = dataset[1]["image"]
|
||||
|
||||
text = (
|
||||
"The left image contains twice the number of dogs as the right image, and at least two dogs in total are"
|
||||
|
@ -1149,8 +1149,8 @@ class TrOCRModelIntegrationTest(unittest.TestCase):
|
||||
def test_inference_handwritten(self):
|
||||
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten").to(torch_device)
|
||||
|
||||
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test", trust_remote_code=True)
|
||||
image = Image.open(dataset[0]["file"]).convert("RGB")
|
||||
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="train")
|
||||
image = dataset[0]["image"]
|
||||
|
||||
processor = self.default_processor
|
||||
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device)
|
||||
@ -1174,8 +1174,8 @@ class TrOCRModelIntegrationTest(unittest.TestCase):
|
||||
def test_inference_printed(self):
|
||||
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed").to(torch_device)
|
||||
|
||||
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test", trust_remote_code=True)
|
||||
image = Image.open(dataset[1]["file"]).convert("RGB")
|
||||
dataset = load_dataset("hf-internal-testing/fixtures_ocr", split="test")
|
||||
image = dataset[0]["image"]
|
||||
|
||||
processor = self.default_processor
|
||||
pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(torch_device)
|
||||
|
@ -97,9 +97,7 @@ def _test_wav2vec2_with_lm_invalid_pool(in_queue, out_queue, timeout):
|
||||
try:
|
||||
_ = in_queue.get(timeout=timeout)
|
||||
|
||||
ds = load_dataset(
|
||||
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
|
||||
)
|
||||
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
|
||||
sample = next(iter(ds))
|
||||
|
||||
resampled_audio = torchaudio.functional.resample(
|
||||
@ -1470,7 +1468,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
return [x["array"] for x in speech_samples]
|
||||
|
||||
def _load_superb(self, task, num_samples):
|
||||
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
||||
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
||||
|
||||
return ds[:num_samples]
|
||||
|
||||
@ -1836,9 +1834,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
@require_pyctcdecode
|
||||
@require_torchaudio
|
||||
def test_wav2vec2_with_lm(self):
|
||||
ds = load_dataset(
|
||||
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
|
||||
)
|
||||
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
|
||||
sample = next(iter(ds))
|
||||
|
||||
resampled_audio = torchaudio.functional.resample(
|
||||
@ -1862,9 +1858,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
@require_pyctcdecode
|
||||
@require_torchaudio
|
||||
def test_wav2vec2_with_lm_pool(self):
|
||||
ds = load_dataset(
|
||||
"mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True, trust_remote_code=True
|
||||
)
|
||||
ds = load_dataset("mozilla-foundation/common_voice_11_0", "es", split="test", streaming=True)
|
||||
sample = next(iter(ds))
|
||||
|
||||
resampled_audio = torchaudio.functional.resample(
|
||||
@ -1963,9 +1957,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||
LANG_MAP = {"it": "ita", "es": "spa", "fr": "fra", "en": "eng"}
|
||||
|
||||
def run_model(lang):
|
||||
ds = load_dataset(
|
||||
"mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True, trust_remote_code=True
|
||||
)
|
||||
ds = load_dataset("mozilla-foundation/common_voice_11_0", lang, split="test", streaming=True)
|
||||
sample = next(iter(ds))
|
||||
|
||||
wav2vec2_lang = LANG_MAP[lang]
|
||||
|
@ -463,9 +463,7 @@ class Wav2Vec2ProcessorWithLMTest(unittest.TestCase):
|
||||
def test_word_time_stamp_integration(self):
|
||||
import torch
|
||||
|
||||
ds = load_dataset(
|
||||
"mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True, trust_remote_code=True
|
||||
)
|
||||
ds = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="train", streaming=True)
|
||||
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
||||
ds_iter = iter(ds)
|
||||
sample = next(ds_iter)
|
||||
|
@ -473,7 +473,7 @@ class WavLMModelIntegrationTest(unittest.TestCase):
|
||||
return [x["array"] for x in speech_samples]
|
||||
|
||||
def _load_superb(self, task, num_samples):
|
||||
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
|
||||
ds = load_dataset("anton-l/superb_dummy", task, split="test")
|
||||
|
||||
return ds[:num_samples]
|
||||
|
||||
|
@ -1645,9 +1645,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
||||
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3")
|
||||
model.to(torch_device)
|
||||
|
||||
ds = load_dataset(
|
||||
"facebook/multilingual_librispeech", "german", split="test", streaming=True, trust_remote_code=True
|
||||
)
|
||||
ds = load_dataset("facebook/multilingual_librispeech", "german", split="test", streaming=True)
|
||||
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
||||
|
||||
input_speech = next(iter(ds))["audio"]["array"]
|
||||
@ -1714,11 +1712,10 @@ class WhisperModelIntegrationTests(unittest.TestCase):
|
||||
|
||||
token = os.getenv("HF_HUB_READ_TOKEN", True)
|
||||
ds = load_dataset(
|
||||
"mozilla-foundation/common_voice_6_1",
|
||||
"hf-internal-testing/fixtures_common_voice",
|
||||
"ja",
|
||||
split="test",
|
||||
streaming=True,
|
||||
trust_remote_code=True,
|
||||
token=token,
|
||||
)
|
||||
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
||||
|
@ -179,7 +179,7 @@ class AudioClassificationPipelineTests(unittest.TestCase):
|
||||
model = "superb/wav2vec2-base-superb-ks"
|
||||
|
||||
audio_classifier = pipeline("audio-classification", model=model)
|
||||
dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test", trust_remote_code=True)
|
||||
dataset = datasets.load_dataset("anton-l/superb_dummy", "ks", split="test")
|
||||
|
||||
audio = np.array(dataset[3]["speech"], dtype=np.float32)
|
||||
output = audio_classifier(audio, top_k=4)
|
||||
|
@ -265,9 +265,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
@require_torch
|
||||
@require_pyctcdecode
|
||||
def test_large_model_pt_with_lm(self):
|
||||
dataset = load_dataset("Narsil/asr_dummy", streaming=True, trust_remote_code=True)
|
||||
third_item = next(iter(dataset["test"].skip(3)))
|
||||
filename = third_item["file"]
|
||||
filename = hf_hub_download("Narsil/asr_dummy", filename="4.flac", repo_type="dataset")
|
||||
|
||||
speech_recognizer = pipeline(
|
||||
task="automatic-speech-recognition",
|
||||
@ -388,7 +386,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
chunk_length_s=8,
|
||||
stride_length_s=1,
|
||||
)
|
||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
|
||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
|
||||
sample = next(iter(data))
|
||||
|
||||
res = pipe(sample["audio"]["array"])
|
||||
@ -434,7 +432,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
stride_length_s=1,
|
||||
return_language=True,
|
||||
)
|
||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
|
||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
|
||||
sample = next(iter(data))
|
||||
|
||||
res = pipe(sample["audio"]["array"])
|
||||
@ -489,7 +487,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
task="automatic-speech-recognition",
|
||||
model="openai/whisper-tiny.en",
|
||||
)
|
||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True, trust_remote_code=True)
|
||||
data = load_dataset("openslr/librispeech_asr", "clean", split="test", streaming=True)
|
||||
samples = [next(iter(data)) for _ in range(8)]
|
||||
audio = np.concatenate([sample["audio"]["array"] for sample in samples])
|
||||
|
||||
@ -1125,9 +1123,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
@slow
|
||||
def test_speculative_decoding_whisper_non_distil(self):
|
||||
# Load data:
|
||||
dataset = load_dataset(
|
||||
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True
|
||||
)
|
||||
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]")
|
||||
sample = dataset[0]["audio"]
|
||||
|
||||
# Load model:
|
||||
@ -1169,9 +1165,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||
@slow
|
||||
def test_speculative_decoding_whisper_distil(self):
|
||||
# Load data:
|
||||
dataset = load_dataset(
|
||||
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]", trust_remote_code=True
|
||||
)
|
||||
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:1]")
|
||||
sample = dataset[0]["audio"]
|
||||
|
||||
# Load model:
|
||||
|
@ -601,7 +601,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
|
||||
|
||||
image_segmenter = pipeline("image-segmentation", model=model, image_processor=image_processor)
|
||||
|
||||
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||
file = image[0]["file"]
|
||||
outputs = image_segmenter(file, threshold=threshold)
|
||||
|
||||
@ -655,7 +655,7 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
|
||||
def test_oneformer(self):
|
||||
image_segmenter = pipeline(model="shi-labs/oneformer_ade20k_swin_tiny")
|
||||
|
||||
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
|
||||
image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
|
||||
file = image[0]["file"]
|
||||
outputs = image_segmenter(file, threshold=0.99)
|
||||
# Shortening by hashing
|
||||
|
@ -3799,8 +3799,20 @@ class ModelTesterMixin:
|
||||
self.skipTest(reason="Idefics currently (transformers==4.39.1) requires an image_attention_mask input")
|
||||
if config.model_type in ["sam"]:
|
||||
self.skipTest(reason="SAM requires an attention_mask input for relative positional embeddings")
|
||||
|
||||
model = model_class(config)
|
||||
|
||||
sub_models_supporting_sdpa = [
|
||||
module._supports_sdpa
|
||||
for name, module in model.named_modules()
|
||||
if isinstance(module, PreTrainedModel) and name != ""
|
||||
]
|
||||
supports_sdpa_all_modules = (
|
||||
all(sub_models_supporting_sdpa) if len(sub_models_supporting_sdpa) > 0 else model._supports_sdpa
|
||||
)
|
||||
if not supports_sdpa_all_modules:
|
||||
self.skipTest(reason="This models' submodels does not support sdpa")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
model.save_pretrained(tmpdirname)
|
||||
model = model_class.from_pretrained(tmpdirname, torch_dtype=torch.float16, attn_implementation="sdpa")
|
||||
@ -3848,8 +3860,20 @@ class ModelTesterMixin:
|
||||
"Cannot compile forward without an existing cache with Hybrid, as `torch._dynamo.mark_static_address` "
|
||||
"is a forbidden call."
|
||||
)
|
||||
|
||||
model = model_class(config)
|
||||
|
||||
sub_models_supporting_sdpa = [
|
||||
module._supports_sdpa
|
||||
for name, module in model.named_modules()
|
||||
if isinstance(module, PreTrainedModel) and name != ""
|
||||
]
|
||||
supports_sdpa_all_modules = (
|
||||
all(sub_models_supporting_sdpa) if len(sub_models_supporting_sdpa) > 0 else model._supports_sdpa
|
||||
)
|
||||
if not supports_sdpa_all_modules:
|
||||
self.skipTest(reason="This models' submodels does not support sdpa")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
model.save_pretrained(tmpdirname)
|
||||
model = model_class.from_pretrained(tmpdirname, torch_dtype=torch.float16, attn_implementation="sdpa")
|
||||
|
@ -1792,6 +1792,25 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
||||
self.assertEqual(modeling_llama.apply_rotary_pos_emb, liger_rotary_pos_emb)
|
||||
self.assertTrue(isinstance(tiny_llama.model.norm, LigerRMSNorm))
|
||||
|
||||
@require_liger_kernel
|
||||
def test_use_liger_kernel_custom_config_patching(self):
|
||||
# Ensure any monkey patching is cleaned up for subsequent tests
|
||||
with patch("transformers.models.llama.modeling_llama"):
|
||||
from liger_kernel.transformers import LigerRMSNorm
|
||||
|
||||
config = LlamaConfig(vocab_size=100, hidden_size=32, num_hidden_layers=3, num_attention_heads=4)
|
||||
tiny_llama = LlamaForCausalLM(config)
|
||||
|
||||
args = TrainingArguments(
|
||||
self.get_auto_remove_tmp_dir(),
|
||||
use_liger_kernel=True,
|
||||
liger_kernel_config={"rms_norm": False}, # Don't apply Liger's RMSNorm
|
||||
)
|
||||
Trainer(tiny_llama, args)
|
||||
|
||||
# Check that the RMSNorm kernel is not applied as specified in the config
|
||||
self.assertFalse(isinstance(tiny_llama.model.norm, LigerRMSNorm))
|
||||
|
||||
@require_liger_kernel
|
||||
@require_torch_accelerator
|
||||
def test_use_liger_kernel_trainer(self):
|
||||
@ -1810,6 +1829,29 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
||||
# Check this works
|
||||
_ = trainer.train()
|
||||
|
||||
@require_liger_kernel
|
||||
@require_torch_accelerator
|
||||
def test_use_liger_kernel_custom_config_trainer(self):
|
||||
# Check that trainer still works with liger kernel applied when using a custom config
|
||||
config = LlamaConfig(vocab_size=100, hidden_size=32, num_hidden_layers=3, num_attention_heads=4)
|
||||
tiny_llama = LlamaForCausalLM(config)
|
||||
|
||||
x = torch.randint(0, 100, (128,))
|
||||
train_dataset = RepeatDataset(x)
|
||||
|
||||
args = TrainingArguments(
|
||||
self.get_auto_remove_tmp_dir(),
|
||||
learning_rate=1e-2,
|
||||
logging_steps=5,
|
||||
max_steps=20,
|
||||
use_liger_kernel=True,
|
||||
liger_kernel_config={"rms_norm": False, "cross_entropy": True, "fused_linear_cross_entropy": False},
|
||||
)
|
||||
trainer = Trainer(tiny_llama, args, train_dataset=train_dataset)
|
||||
|
||||
# Check this works
|
||||
_ = trainer.train()
|
||||
|
||||
@require_lomo
|
||||
@require_torch_accelerator
|
||||
def test_lomo(self):
|
||||
|
@ -133,10 +133,19 @@ if __name__ == "__main__":
|
||||
# Assuming there is a topological sort on the dependency mapping: if the file being checked and its dependencies
|
||||
# are not in the diff, then there it is guaranteed to have no differences. If no models are in the diff, then this
|
||||
# script will do nothing.
|
||||
models_in_diff = get_models_in_diff()
|
||||
if not models_in_diff:
|
||||
console.print("[bold green]No models files or model tests in the diff, skipping modular checks[/bold green]")
|
||||
exit(0)
|
||||
current_branch = subprocess.check_output(["git", "branch", "--show-current"], text=True).strip()
|
||||
if current_branch == "main":
|
||||
console.print(
|
||||
"[bold red]You are developing on the main branch. We cannot identify the list of changed files and will have to check all files. This may take a while.[/bold red]"
|
||||
)
|
||||
models_in_diff = {file_path.split("/")[-2] for file_path in args.files}
|
||||
else:
|
||||
models_in_diff = get_models_in_diff()
|
||||
if not models_in_diff:
|
||||
console.print(
|
||||
"[bold green]No models files or model tests in the diff, skipping modular checks[/bold green]"
|
||||
)
|
||||
exit(0)
|
||||
|
||||
skipped_models = set()
|
||||
non_matching_files = 0
|
||||
@ -149,7 +158,8 @@ if __name__ == "__main__":
|
||||
skipped_models.add(model_name)
|
||||
continue
|
||||
non_matching_files += compare_files(modular_file_path, args.fix_and_overwrite)
|
||||
models_in_diff = get_models_in_diff() # When overwriting, the diff changes
|
||||
if current_branch != "main":
|
||||
models_in_diff = get_models_in_diff() # When overwriting, the diff changes
|
||||
else:
|
||||
new_ordered_files = []
|
||||
for modular_file_path in ordered_files:
|
||||
|
@ -1,59 +0,0 @@
|
||||
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
This script is used to get a map containing the information of runners to use in GitHub Actions workflow files.
|
||||
This is meant to be a temporary file that helps us to switch progressively from T4 to A10 runners.
|
||||
|
||||
The data is stored in a Hub repository [hf-internal-testing/transformers_daily_ci](https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/blob/main/runner_map.json).
|
||||
Currently, in that file, we specify the models for which we want to run the tests with T4 runners to avoid many test failures showing on the CI reports.
|
||||
We will work on the tests toward to use A10 for all CI jobs.
|
||||
"""
|
||||
|
||||
import os
|
||||
import requests
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# T4
|
||||
t4_runners = {
|
||||
"single-gpu": "aws-g4dn-4xlarge-cache",
|
||||
"multi-gpu": "aws-g4dn-12xlarge-cache",
|
||||
}
|
||||
|
||||
# A10
|
||||
a10_runners = {
|
||||
"single-gpu": "aws-g5-4xlarge-cache",
|
||||
"multi-gpu": "aws-g5-12xlarge-cache",
|
||||
}
|
||||
|
||||
tests = os.getcwd()
|
||||
model_tests = os.listdir(os.path.join(tests, "models"))
|
||||
d1 = sorted(filter(os.path.isdir, os.listdir(tests)))
|
||||
d2 = sorted(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))
|
||||
d1.remove("models")
|
||||
d = d2 + d1
|
||||
|
||||
response = requests.get("https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/resolve/main/runner_map.json")
|
||||
# The models that we want to run with T4 runners
|
||||
runner_map = response.json()
|
||||
|
||||
for key in d:
|
||||
if key in runner_map:
|
||||
runner_map[key] = t4_runners
|
||||
else:
|
||||
runner_map[key] = a10_runners
|
||||
|
||||
print(runner_map)
|
@ -1494,7 +1494,7 @@ if __name__ == "__main__":
|
||||
other_ci_artifacts=other_ci_artifacts,
|
||||
)
|
||||
|
||||
# # send report only if there is any failure (for push CI)
|
||||
# if message.n_failures or (ci_event != "push" and not ci_event.startswith("Push CI (AMD)")):
|
||||
# message.post()
|
||||
# message.post_reply()
|
||||
# send report only if there is any failure (for push CI)
|
||||
if message.n_failures or (ci_event != "push" and not ci_event.startswith("Push CI (AMD)")):
|
||||
message.post()
|
||||
message.post_reply()
|
||||
|
@ -62,5 +62,4 @@ if __name__ == "__main__":
|
||||
start = end
|
||||
end = start + num_jobs_per_splits + (1 if idx < num_jobs % args.num_splits else 0)
|
||||
model_splits.append(d[start:end])
|
||||
model_splits = [['models/vit', 'generation'], ['models/clip', 'models/vits']]
|
||||
print(model_splits)
|
||||
|
Reference in New Issue
Block a user