Compare commits

..

3 Commits

Author SHA1 Message Date
25ce53c6f1 Revert Bamba changes because they're inherited 2025-06-04 16:55:54 +01:00
e5c0f30d9b make fixup 2025-06-04 16:40:37 +01:00
7ba1b8c465 Refactor causal LM tests to inherit from base classes
- Updated 5 models to inherit from CausalLMModelTester and CausalLMModelTest
- Models updated: bamba, biogpt, bloom, codegen, cohere
- Removed redundant methods that are now inherited from base classes
- Added required attributes (base_model_class, causal_lm_class) where needed
- Fixed model-specific issues:
  - CodeGen: Set use_token_type_ids=False to avoid parameter conflicts
  - All models: Removed redundant test methods already in base classes
- All tests passing except bloom's test_bloom_model_past_large_inputs (alibi-specific issue)
2025-06-04 16:40:37 +01:00
173 changed files with 480 additions and 1332 deletions

View File

@ -39,7 +39,7 @@ jobs:
fetch_tests:
working_directory: ~/transformers
docker:
- image: huggingface/transformers-quality:dev
- image: huggingface/transformers-quality
parallelism: 1
steps:
- checkout
@ -91,7 +91,7 @@ jobs:
fetch_all_tests:
working_directory: ~/transformers
docker:
- image: huggingface/transformers-quality:dev
- image: huggingface/transformers-quality
parallelism: 1
steps:
- checkout
@ -140,7 +140,7 @@ jobs:
check_code_quality:
working_directory: ~/transformers
docker:
- image: huggingface/transformers-quality:dev
- image: huggingface/transformers-quality
resource_class: large
environment:
TRANSFORMERS_IS_CI: yes
@ -165,7 +165,7 @@ jobs:
check_repository_consistency:
working_directory: ~/transformers
docker:
- image: huggingface/transformers-consistency:dev
- image: huggingface/transformers-consistency
resource_class: large
environment:
TRANSFORMERS_IS_CI: yes

View File

@ -105,10 +105,12 @@ class CircleCIJob:
else:
# BIG HACK WILL REMOVE ONCE FETCHER IS UPDATED
print(os.environ.get("GIT_COMMIT_MESSAGE"))
self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev"
if "[build-ci-image]" in os.environ.get("GIT_COMMIT_MESSAGE", "") or os.environ.get("GIT_COMMIT_MESSAGE", "") == "dev-ci":
self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev"
print(f"Using {self.docker_image} docker image")
if self.install_steps is None:
self.install_steps = ["uv venv && uv pip install ."]
self.install_steps.append("uv venv && uv pip install git+https://github.com/ydshieh/pytest.git@8.3.5-ydshieh git+https://github.com/ydshieh/pluggy.git@1.5.0-ydshieh")
if self.pytest_options is None:
self.pytest_options = {}
if isinstance(self.tests_to_run, str):
@ -212,7 +214,7 @@ generate_job = CircleCIJob(
docker_image=[{"image": "huggingface/transformers-torch-light"}],
# networkx==3.3 (after #36957) cause some issues
# TODO: remove this once it works directly
install_steps=["uv venv && uv pip install ."],
install_steps=["uv venv && uv pip install . && uv pip install networkx==3.2.1"],
marker="generate",
parallelism=6,
)
@ -308,7 +310,7 @@ onnx_job = CircleCIJob(
docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
install_steps=[
"uv venv",
"uv pip install .[testing,sentencepiece,onnxruntime,vision,rjieba]",
"uv pip install .[torch,tf,testing,sentencepiece,onnxruntime,vision,rjieba]",
],
pytest_options={"k onnx": None},
pytest_num_workers=1,
@ -337,7 +339,7 @@ non_model_job = CircleCIJob(
docker_image=[{"image": "huggingface/transformers-torch-light"}],
# networkx==3.3 (after #36957) cause some issues
# TODO: remove this once it works directly
install_steps=["uv venv && uv pip install ."],
install_steps=["uv venv && uv pip install . && uv pip install networkx==3.2.1"],
marker="not generate",
parallelism=6,
)

View File

@ -3,7 +3,7 @@ name: Build pr ci-docker
on:
push:
branches:
- try_torch_2.7_on_circleci_jobs_xxx
- push-ci-image # for now let's only build on this branch
repository_dispatch:
workflow_call:
inputs:
@ -22,6 +22,7 @@ jobs:
build:
runs-on: ubuntu-22.04
if: ${{ contains(github.event.head_commit.message, '[build-ci-image]') || contains(github.event.head_commit.message, '[push-ci-image]') && '!cancelled()' || github.event_name == 'schedule' }}
strategy:
matrix:
@ -32,8 +33,13 @@ jobs:
-
name: Set tag
run: |
echo "TAG=huggingface/transformers-${{ matrix.file }}:dev" >> "$GITHUB_ENV"
echo "setting it to DEV!"
if ${{contains(github.event.head_commit.message, '[build-ci-image]')}}; then
echo "TAG=huggingface/transformers-${{ matrix.file }}:dev" >> "$GITHUB_ENV"
echo "setting it to DEV!"
else
echo "TAG=huggingface/transformers-${{ matrix.file }}" >> "$GITHUB_ENV"
fi
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@ -54,5 +60,18 @@ jobs:
build-args: |
REF=${{ github.sha }}
file: "./docker/${{ matrix.file }}.dockerfile"
push: true
push: ${{ contains(github.event.head_commit.message, 'ci-image]') || github.event_name == 'schedule' }}
tags: ${{ env.TAG }}
notify:
runs-on: ubuntu-22.04
if: ${{ contains(github.event.head_commit.message, '[build-ci-image]') || contains(github.event.head_commit.message, '[push-ci-image]') && '!cancelled()' || github.event_name == 'schedule' }}
steps:
- name: Post to Slack
if: ${{ contains(github.event.head_commit.message, '[push-ci-image]') && github.event_name != 'schedule' }}
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: "#transformers-ci-circleci-images"
title: 🤗 New docker images for CircleCI are pushed.
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

View File

@ -5,7 +5,7 @@ ARG REF=main
RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs
ENV UV_PYTHON=/usr/local/bin/python
RUN pip install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools GitPython
RUN uv pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir --upgrade 'torch==2.6.0' 'torchaudio==2.6.0' 'torchvision==0.21.0' --index-url https://download.pytorch.org/whl/cpu
# tensorflow pin matching setup.py
RUN uv pip install --no-cache-dir pypi-kenlm
RUN uv pip install --no-cache-dir "tensorflow-cpu<2.16" "tf-keras<2.16"

View File

@ -16,7 +16,7 @@ RUN cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local
RUN make install -j 10
RUN uv pip install --no-cache --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache --upgrade 'torch==2.6.0' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ja,testing,sentencepiece,jieba,spacy,ftfy,rjieba]" unidic unidic-lite
# spacy is not used so not tested. Causes to failures. TODO fix later

View File

@ -5,7 +5,7 @@ USER root
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir 'torch==2.6.0' 'torchaudio==2.6.0' 'torchvision==0.21.0' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" seqeval albumentations jiwer
RUN uv pip uninstall transformers

View File

@ -5,7 +5,7 @@ USER root
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git libgl1-mesa-glx libgl1 g++ tesseract-ocr
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir 'torch==2.6.0' 'torchaudio==2.6.0' 'torchvision==0.21.0' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir --no-deps timm accelerate
RUN pip install -U --upgrade-strategy eager --no-cache-dir pytesseract python-Levenshtein opencv-python nltk
# RUN uv pip install --no-cache-dir natten==0.15.1+torch210cpu -f https://shi-labs.com/natten/wheels

View File

@ -5,7 +5,7 @@ USER root
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir --upgrade 'torch==2.6.0' 'torchaudio==2.6.0' 'torchvision==0.21.0' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]"
RUN uv pip uninstall transformers

View File

@ -5,7 +5,7 @@ USER root
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git git-lfs
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir --upgrade 'torch==2.6.0' 'torchaudio==2.6.0' 'torchvision==0.21.0' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken,num2words,video]"
RUN uv pip uninstall transformers

View File

@ -7,7 +7,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-de
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN uv pip install --no-cache-dir --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir 'torch==2.6.0' 'torchaudio==2.6.0' 'torchvision==0.21.0' --index-url https://download.pytorch.org/whl/cpu
RUN git lfs install
RUN uv pip install --no-cache-dir pypi-kenlm

View File

@ -14,94 +14,93 @@ rendered properly in your Markdown viewer.
-->
<div style="float: right;">
<div class="flex flex-wrap space-x-1">
<img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
<img alt="Flax" src="https://img.shields.io/badge/Flax-29a79b.svg?style=flat&logo=
">
<img alt="FlashAttention" src="https://img.shields.io/badge/%E2%9A%A1%EF%B8%8E%20FlashAttention-eae0c8?style=flat">
</div>
# GPT Neo
<div class="flex flex-wrap space-x-1">
<img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
<img alt="Flax" src="https://img.shields.io/badge/Flax-29a79b.svg?style=flat&logo=
">
<img alt="FlashAttention" src="https://img.shields.io/badge/%E2%9A%A1%EF%B8%8E%20FlashAttention-eae0c8?style=flat">
</div>
## Overview
The GPTNeo model was released in the [EleutherAI/gpt-neo](https://github.com/EleutherAI/gpt-neo) repository by Sid
Black, Stella Biderman, Leo Gao, Phil Wang and Connor Leahy. It is a GPT2 like causal language model trained on the
[Pile](https://pile.eleuther.ai/) dataset.
The architecture is similar to GPT2 except that GPT Neo uses local attention in every other layer with a window size of
256 tokens.
This model was contributed by [valhalla](https://huggingface.co/valhalla).
## Usage example
The `generate()` method can be used to generate text using GPT Neo model.
```python
>>> from transformers import GPTNeoForCausalLM, GPT2Tokenizer
>>> model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
>>> tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
>>> prompt = (
... "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
... "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
... "researchers was the fact that the unicorns spoke perfect English."
... )
>>> input_ids = tokenizer(prompt, return_tensors="pt").input_ids
>>> gen_tokens = model.generate(
... input_ids,
... do_sample=True,
... temperature=0.9,
... max_length=100,
... )
>>> gen_text = tokenizer.batch_decode(gen_tokens)[0]
```
## Combining GPT-Neo and Flash Attention 2
First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature, and make sure your hardware is compatible with Flash-Attention 2. More details are available [here](https://huggingface.co/docs/transformers/perf_infer_gpu_one#flashattention-2) concerning the installation.
Make sure as well to load your model in half-precision (e.g. `torch.float16`).
To load and run a model using Flash Attention 2, refer to the snippet below:
```python
>>> import torch
>>> from transformers import AutoModelForCausalLM, AutoTokenizer
>>> device = "cuda" # the device to load the model onto
>>> model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
>>> tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
>>> prompt = "def hello_world():"
>>> model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
>>> model.to(device)
>>> generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
>>> tokenizer.batch_decode(generated_ids)[0]
"def hello_world():\n >>> run_script("hello.py")\n >>> exit(0)\n<|endoftext|>"
```
### Expected speedups
Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using `EleutherAI/gpt-neo-2.7B` checkpoint and the Flash Attention 2 version of the model.
Note that for GPT-Neo it is not possible to train / run on very long context as the max [position embeddings](https://huggingface.co/EleutherAI/gpt-neo-2.7B/blob/main/config.json#L58 ) is limited to 2048 - but this is applicable to all gpt-neo models and not specific to FA-2
<div style="text-align: center">
<img src="https://user-images.githubusercontent.com/49240599/272241893-b1c66b75-3a48-4265-bc47-688448568b3d.png">
</div>
## GPT-Neo
## Resources
[GPT-Neo](https://zenodo.org/records/5297715) is an open-source alternative to GPT-2 and GPT-3 models, built with Mesh TensorFlow for TPUs. GPT-Neo uses local attention in every other layer for more efficiency. It is trained on the [Pile](https://huggingface.co/datasets/EleutherAI/pile), a diverse dataset consisting of 22 smaller high-quality datasets.
You can find all the original GPT-Neo checkpoints under the [EleutherAI](https://huggingface.co/EleutherAI?search_models=gpt-neo) organization.
> [!TIP]
> Click on the GPT-Neo models in the right sidebar for more examples of how to apply GPT Neo to different language tasks.
The example below demonstrates how to generate text with [`Pipeline`] or the [`AutoModel`], and from the command line.
<hfoptions id="usage">
<hfoption id="Pipeline">
```py
import torch
from transformers import pipeline
pipeline = pipeline(task="text-generation", model="EleutherAI/gpt-neo-1.3B", torch_dtype=torch.float16, device=0)
pipeline("Hello, I'm a language model")
```
</hfoption>
<hfoption id="AutoModel">
```py
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B", torch_dtype=torch.float16, device_map="auto", attn_implementation="flash_attention_2")
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
input_ids = tokenizer("Hello, I'm a language model", return_tensors="pt").to("cuda")
output = model.generate(**input_ids)
print(tokenizer.decode(output[0], skip_special_tokens=True))
```
</hfoption>
<hfoption id="transformers CLI">
```bash
echo -e "Hello, I'm a language model" | transformers-cli run --task text-generation --model EleutherAI/gpt-neo-1.3B --device 0
```
</hfoption>
</hfoptions>
Quantization reduces the memory burden of large models by representing the weights in a lower precision. Refer to the [Quantization](../quantization/overview) overview for more available quantization backends.
The example below uses [bitsandbytes](../quantization/bitsandbytes) to only quantize the weights to 4-bits.
```py
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype="float16",
bnb_4bit_use_double_quant=True
)
model = AutoModelForCausalLM.from_pretrained(
"EleutherAI/gpt-neo-2.7B",
quantization_config=quantization_config,
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
inputs = tokenizer("Hello, I'm a language model", return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
```
## Notes
- Pad inputs on the right because GPT-Neo uses absolute position embeddings.
- [Text classification task guide](../tasks/sequence_classification)
- [Causal language modeling task guide](../tasks/language_modeling)
## GPTNeoConfig

View File

@ -119,11 +119,6 @@ Image.fromarray(depth.astype("uint8"))
[[autodoc]] ZoeDepthImageProcessor
- preprocess
## ZoeDepthImageProcessorFast
[[autodoc]] ZoeDepthImageProcessorFast
- preprocess
## ZoeDepthForDepthEstimation
[[autodoc]] ZoeDepthForDepthEstimation

View File

@ -32,8 +32,8 @@ ocalhost:29504 test_train.py
import logging
import os
from collections.abc import Iterable
from contextlib import nullcontext
from typing import Iterable
import torch
import torch.distributed as dist

View File

@ -3,6 +3,6 @@ jaxlib>=0.1.59
flax>=0.3.5
optax>=0.0.8
-f https://download.pytorch.org/whl/torch_stable.html
torch==2.7.1
torch==2.6.0
-f https://download.pytorch.org/whl/torch_stable.html
torchvision==0.12.0+cpu

View File

@ -31,9 +31,8 @@ ocalhost:29504 test_train.py
import logging
import os
from collections.abc import Iterable
from contextlib import nullcontext
from typing import Dict, Optional
from typing import Dict, Iterable, Optional
import torch
import torch.distributed as dist

View File

@ -142,7 +142,6 @@ _deps = [
"optimum-benchmark>=0.3.0",
"optuna",
"optax>=0.0.8,<=0.1.4",
"pandas<2.3.0", # `datasets` requires `pandas` while `pandas==2.3.0` has issues with CircleCI on 2025/06/05
"packaging>=20.0",
"parameterized",
"phonemizer",
@ -368,7 +367,7 @@ extras["testing"] = (
extras["deepspeed-testing"] = extras["deepspeed"] + extras["testing"] + extras["optuna"] + extras["sentencepiece"]
extras["ruff"] = deps_list("ruff")
extras["quality"] = deps_list("datasets", "isort", "ruff", "GitPython", "urllib3", "libcst", "rich", "pandas")
extras["quality"] = deps_list("datasets", "isort", "ruff", "GitPython", "urllib3", "libcst", "rich")
extras["all"] = (
extras["tf"]

View File

@ -2,9 +2,8 @@ import copy
import importlib.metadata
import json
import os
from collections.abc import Iterable
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
import torch
from packaging import version

View File

@ -21,8 +21,7 @@ from dataclasses import dataclass
from datetime import date
from itertools import chain
from pathlib import Path
from re import Pattern
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from typing import Any, Callable, Dict, List, Optional, Pattern, Tuple, Union
import yaml

View File

@ -824,7 +824,7 @@ class DataCollatorForLanguageModeling(DataCollatorMixin):
tokenizer: PreTrainedTokenizerBase
mlm: bool = True
mlm_probability: Optional[float] = 0.15
mlm_probability: float = 0.15
mask_replace_prob: float = 0.8
random_replace_prob: float = 0.1
pad_to_multiple_of: Optional[int] = None
@ -833,15 +833,13 @@ class DataCollatorForLanguageModeling(DataCollatorMixin):
seed: Optional[int] = None
def __post_init__(self):
if self.mlm:
if self.tokenizer.mask_token is None:
raise ValueError(
"This tokenizer does not have a mask token which is necessary for masked language modeling. "
"You should pass `mlm=False` to train on causal language modeling instead."
)
if self.mlm_probability is None or self.mlm_probability < 0 or self.mlm_probability > 1:
raise ValueError("mlm_probability should be between 0 and 1.")
self.mlm_probability = float(self.mlm_probability)
if self.mlm and self.tokenizer.mask_token is None:
raise ValueError(
"This tokenizer does not have a mask token which is necessary for masked language modeling. "
"You should pass `mlm=False` to train on causal language modeling instead."
)
if self.mlm_probability < 0 or self.mlm_probability > 1:
raise ValueError("mlm_probability should be between 0 and 1.")
if self.mask_replace_prob + self.random_replace_prob > 1:
raise ValueError("The sum of mask_replace_prob and random_replace_prob should not exceed 1")
if self.mask_replace_prob < 0 or self.mask_replace_prob > 1:
@ -849,6 +847,7 @@ class DataCollatorForLanguageModeling(DataCollatorMixin):
if self.random_replace_prob < 0 or self.random_replace_prob > 1:
raise ValueError("random_replace_prob should be between 0 and 1.")
self.mlm_probability = float(self.mlm_probability)
self.mask_replace_prob = float(self.mask_replace_prob)
self.random_replace_prob = float(self.random_replace_prob)

View File

@ -48,7 +48,6 @@ deps = {
"optimum-benchmark": "optimum-benchmark>=0.3.0",
"optuna": "optuna",
"optax": "optax>=0.0.8,<=0.1.4",
"pandas": "pandas<2.3.0",
"packaging": "packaging>=20.0",
"parameterized": "parameterized",
"phonemizer": "phonemizer",

View File

@ -15,8 +15,7 @@
import inspect
import math
from collections.abc import Iterable
from typing import TYPE_CHECKING, Callable, List, Optional, Tuple, Union
from typing import TYPE_CHECKING, Callable, Iterable, List, Optional, Tuple, Union
import numpy as np
import torch

View File

@ -16,7 +16,7 @@
"""ALBERT model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig

View File

@ -18,8 +18,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections.abc import Iterable
from typing import List, Optional, Tuple, Union
from typing import Iterable, List, Optional, Tuple, Union
import numpy as np

View File

@ -12,8 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections.abc import Iterable
from typing import Dict, List, Optional, Tuple, Union
from typing import Dict, Iterable, List, Optional, Tuple, Union
import numpy as np

View File

@ -442,12 +442,8 @@ class _BaseAutoModelClass:
else:
repo_id = config.name_or_path
model_class = get_class_from_dynamic_module(class_ref, repo_id, **kwargs)
# This block handles the case where the user is loading a model with `trust_remote_code=True`
# but a library model exists with the same name. We don't want to override the autoclass
# mappings in this case, or all future loads of that model will be the remote code model.
if not has_local_code:
cls.register(config.__class__, model_class, exist_ok=True)
model_class.register_for_auto_class(auto_class=cls)
model_class.register_for_auto_class(auto_class=cls)
cls.register(config.__class__, model_class, exist_ok=True)
_ = kwargs.pop("code_revision", None)
model_class = add_generation_mixin_to_remote_model(model_class)
return model_class._from_config(config, **kwargs)
@ -583,12 +579,8 @@ class _BaseAutoModelClass:
class_ref, pretrained_model_name_or_path, code_revision=code_revision, **hub_kwargs, **kwargs
)
_ = hub_kwargs.pop("code_revision", None)
# This block handles the case where the user is loading a model with `trust_remote_code=True`
# but a library model exists with the same name. We don't want to override the autoclass
# mappings in this case, or all future loads of that model will be the remote code model.
if not has_local_code:
cls.register(config.__class__, model_class, exist_ok=True)
model_class.register_for_auto_class(auto_class=cls)
cls.register(config.__class__, model_class, exist_ok=True)
model_class.register_for_auto_class(auto_class=cls)
model_class = add_generation_mixin_to_remote_model(model_class)
return model_class.from_pretrained(
pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs

View File

@ -170,7 +170,7 @@ else:
("vitmatte", ("VitMatteImageProcessor", "VitMatteImageProcessorFast")),
("xclip", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
("yolos", ("YolosImageProcessor", "YolosImageProcessorFast")),
("zoedepth", ("ZoeDepthImageProcessor", "ZoeDepthImageProcessorFast")),
("zoedepth", ("ZoeDepthImageProcessor",)),
]
)

View File

@ -16,8 +16,7 @@
import warnings
from collections import OrderedDict
from collections.abc import Mapping
from typing import Any, Optional
from typing import Any, Mapping, Optional
from ... import PreTrainedTokenizer
from ...configuration_utils import PretrainedConfig

View File

@ -16,7 +16,7 @@
import warnings
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from packaging import version

View File

@ -16,7 +16,7 @@
"""BERT model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig

View File

@ -15,7 +15,7 @@
"""BigBird model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig

View File

@ -15,8 +15,7 @@
"""BigBirdPegasus model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Any, Optional
from typing import Any, Mapping, Optional
from ... import PreTrainedTokenizer
from ...configuration_utils import PretrainedConfig

View File

@ -15,8 +15,7 @@
"""Blenderbot model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Any, Optional
from typing import Any, Mapping, Optional
from ... import PreTrainedTokenizer
from ...configuration_utils import PretrainedConfig

View File

@ -15,8 +15,7 @@
"""BlenderbotSmall model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Any, Optional
from typing import Any, Mapping, Optional
from ... import PreTrainedTokenizer
from ...configuration_utils import PretrainedConfig

View File

@ -15,8 +15,7 @@
"""Bloom configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, List, Optional
from typing import TYPE_CHECKING, Any, List, Mapping, Optional
from packaging import version

View File

@ -14,8 +14,7 @@
# limitations under the License.
"""Image processor class for BridgeTower."""
from collections.abc import Iterable
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
import numpy as np

View File

@ -14,8 +14,7 @@
# limitations under the License.
"""Fast Image processor class for BridgeTower."""
from collections.abc import Iterable
from typing import Dict, Optional, Tuple, Union
from typing import Dict, Iterable, Optional, Tuple, Union
from ...image_processing_utils_fast import (
BaseImageProcessorFast,

View File

@ -16,7 +16,7 @@
"""CamemBERT configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig

View File

@ -15,8 +15,7 @@
"""Chinese-CLIP model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, Optional
from typing import TYPE_CHECKING, Any, Mapping, Optional
if TYPE_CHECKING:

View File

@ -15,8 +15,7 @@
"""CLIP model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, Optional
from typing import TYPE_CHECKING, Any, Mapping, Optional
if TYPE_CHECKING:

View File

@ -15,8 +15,7 @@
"""CodeGen model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Any, List, Optional
from typing import Any, List, Mapping, Optional
from ... import PreTrainedTokenizer, TensorType, is_torch_available
from ...configuration_utils import PretrainedConfig

View File

@ -15,7 +15,7 @@
"""Conditional DETR model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from packaging import version

View File

@ -17,8 +17,7 @@
import io
import pathlib
from collections import defaultdict
from collections.abc import Iterable
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, Union
import numpy as np

View File

@ -15,7 +15,7 @@
"""ConvBERT model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig

View File

@ -15,7 +15,7 @@
"""ConvNeXT model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from packaging import version

View File

@ -15,7 +15,7 @@
"""Data2VecText configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig

View File

@ -15,7 +15,7 @@
"""Data2VecVision model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from packaging import version

View File

@ -15,8 +15,7 @@
"""DeBERTa model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, Optional, Union
from typing import TYPE_CHECKING, Any, Mapping, Optional, Union
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig

View File

@ -17,8 +17,7 @@
from __future__ import annotations
import math
from collections.abc import Sequence
from typing import Dict, Optional, Tuple, Union
from typing import Dict, Optional, Sequence, Tuple, Union
import numpy as np
import tensorflow as tf

View File

@ -15,8 +15,7 @@
"""DeBERTa-v2 model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, Optional, Union
from typing import TYPE_CHECKING, Any, Mapping, Optional, Union
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig

View File

@ -17,8 +17,7 @@
import io
import pathlib
from collections import defaultdict
from collections.abc import Iterable
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, Union
import numpy as np

View File

@ -15,7 +15,7 @@
"""DeiT model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from packaging import version

View File

@ -15,8 +15,7 @@
"""Image processor class for Deformable DETR."""
import pathlib
from collections.abc import Iterable
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
import numpy as np

View File

@ -1,8 +1,7 @@
# Copyright (c) Microsoft Corporation and HuggingFace
# Licensed under the MIT License.
from collections.abc import Mapping
from typing import Any, Dict, List
from typing import Any, Dict, List, Mapping
import numpy as np
import torch

View File

@ -15,8 +15,7 @@
"""PyTorch Graphormer model."""
import math
from collections.abc import Iterable, Iterator
from typing import List, Optional, Tuple, Union
from typing import Iterable, Iterator, List, Optional, Tuple, Union
import torch
import torch.nn as nn

View File

@ -15,7 +15,7 @@
"""MEGA configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from ....configuration_utils import PretrainedConfig
from ....onnx import OnnxConfig

View File

@ -15,7 +15,7 @@
"""DETR model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from packaging import version

View File

@ -17,8 +17,7 @@
import io
import pathlib
from collections import defaultdict
from collections.abc import Iterable
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, Union
import numpy as np

View File

@ -15,7 +15,7 @@
"""DINOv2 model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from packaging import version

View File

@ -491,7 +491,7 @@ class Dinov2PreTrainedModel(PreTrainedModel):
base_model_prefix = "dinov2"
main_input_name = "pixel_values"
supports_gradient_checkpointing = True
_no_split_modules = ["Dinov2Layer"]
_no_split_modules = ["Dinov2SwiGLUFFN"]
_supports_sdpa = True
_supports_flash_attn_2 = True
_supports_flex_attn = True

View File

@ -509,7 +509,7 @@ class Dinov2WithRegistersPreTrainedModel(PreTrainedModel):
base_model_prefix = "dinov2_with_registers"
main_input_name = "pixel_values"
supports_gradient_checkpointing = True
_no_split_modules = ["Dinov2WithRegistersLayer"]
_no_split_modules = ["Dinov2WithRegistersSwiGLUFFN"]
_supports_sdpa = True
_supports_flash_attn_2 = True
_supports_flex_attn = True

View File

@ -15,7 +15,7 @@
"""DistilBERT model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig

View File

@ -15,8 +15,7 @@
"""Image processor class for DPT."""
import math
from collections.abc import Iterable
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union
from ...utils.import_utils import requires

View File

@ -15,8 +15,7 @@
"""EfficientNet model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import List
from typing import List, Mapping
from packaging import version

View File

@ -16,7 +16,7 @@
"""ELECTRA model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig

View File

@ -15,8 +15,7 @@
# limitations under the License.
import math
from collections.abc import Iterable
from typing import Dict, List, Optional, Union
from typing import Dict, Iterable, List, Optional, Union
import numpy as np

View File

@ -16,7 +16,7 @@
"""ERNIE model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig

View File

@ -14,10 +14,9 @@
# limitations under the License.
import math
import sys
from collections.abc import Sequence
from dataclasses import dataclass
from functools import partial
from typing import Callable, Dict, List, Optional, Tuple, Union
from typing import Callable, Dict, List, Optional, Sequence, Tuple, Union
import numpy as np
import torch

View File

@ -13,9 +13,8 @@
# limitations under the License.
import logging
import math
from collections.abc import Iterable, Sequence
from functools import partial
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
import torch

View File

@ -18,8 +18,7 @@
import dataclasses
import re
import string
from collections.abc import Iterator, Mapping, Sequence
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple
import numpy as np

View File

@ -18,9 +18,8 @@
import collections
import copy
import functools
from collections.abc import Mapping, Sequence
from importlib import resources
from typing import Dict, List, Tuple
from typing import Dict, List, Mapping, Sequence, Tuple
import numpy as np

View File

@ -15,9 +15,8 @@
from __future__ import annotations
from collections.abc import Sequence
from functools import lru_cache
from typing import Any, Callable, Dict, List, Optional, Tuple
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
import numpy as np
import torch

View File

@ -15,7 +15,7 @@
"""Flaubert configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig

View File

@ -16,9 +16,8 @@
import math
import random
from collections.abc import Iterable
from functools import lru_cache
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
import numpy as np

View File

@ -16,9 +16,8 @@
import math
import random
from collections.abc import Iterable
from functools import lru_cache
from typing import Any, Dict, Optional, Tuple, Union
from typing import Any, Dict, Iterable, Optional, Tuple, Union
from ...image_processing_utils_fast import (
BaseImageProcessorFast,

View File

@ -16,8 +16,7 @@
"""OpenAI GPT-2 configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Any, List, Optional
from typing import Any, List, Mapping, Optional
from ... import PreTrainedTokenizer, TensorType, is_torch_available
from ...configuration_utils import PretrainedConfig

View File

@ -15,8 +15,7 @@
"""GPT Neo model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Any, Optional
from typing import Any, Mapping, Optional
from ... import PreTrainedTokenizer, TensorType, is_torch_available
from ...configuration_utils import PretrainedConfig

View File

@ -15,8 +15,7 @@
"""GPT-J model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Any, List, Optional
from typing import Any, List, Mapping, Optional
from ... import PreTrainedTokenizer, TensorType, is_torch_available
from ...configuration_utils import PretrainedConfig

View File

@ -17,8 +17,7 @@
import io
import pathlib
from collections import defaultdict
from collections.abc import Iterable
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Set, Tuple, Union
from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, Union
import numpy as np

View File

@ -15,8 +15,7 @@
"""GroupViT model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, Optional
from typing import TYPE_CHECKING, Any, Mapping, Optional
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig

View File

@ -17,7 +17,7 @@
"""I-BERT configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig

View File

@ -14,8 +14,7 @@
# limitations under the License.
from collections.abc import Iterable
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
import numpy as np

View File

@ -14,8 +14,7 @@
# limitations under the License.
import math
from collections.abc import Iterable
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
import numpy as np

View File

@ -15,8 +15,7 @@
"""OpenAI ImageGPT configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, Optional
from typing import TYPE_CHECKING, Any, Mapping, Optional
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig

View File

@ -19,8 +19,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from collections.abc import Iterable
from typing import Dict, List, Optional, Tuple, Union
from typing import Dict, Iterable, List, Optional, Tuple, Union
import numpy as np

View File

@ -14,9 +14,8 @@
# limitations under the License.
import copy
from collections.abc import Iterable
from dataclasses import dataclass
from typing import Callable, Dict, List, Optional, Tuple, Union
from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union
import numpy as np
import torch

View File

@ -15,8 +15,7 @@
"""LayoutLM model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Any, List, Optional
from typing import Any, List, Mapping, Optional
from ... import PretrainedConfig, PreTrainedTokenizer
from ...onnx import OnnxConfig, PatchingSpec

View File

@ -15,8 +15,7 @@
"""LayoutLMv3 model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, Optional
from typing import TYPE_CHECKING, Any, Mapping, Optional
from packaging import version

View File

@ -14,8 +14,7 @@
# limitations under the License.
"""Image processor class for LayoutLMv3."""
from collections.abc import Iterable
from typing import Dict, Optional, Union
from typing import Dict, Iterable, Optional, Union
import numpy as np

View File

@ -15,7 +15,7 @@
"""LeViT model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Mapping
from packaging import version

View File

@ -14,8 +14,7 @@
# limitations under the License.
"""Image processor class for LeViT."""
from collections.abc import Iterable
from typing import Dict, Optional, Union
from typing import Dict, Iterable, Optional, Union
import numpy as np

View File

@ -14,8 +14,7 @@
# limitations under the License.
"""Image processor class for LLaVa-NeXT."""
from collections.abc import Iterable
from typing import Dict, List, Optional, Tuple, Union
from typing import Dict, Iterable, List, Optional, Tuple, Union
import numpy as np

View File

@ -14,8 +14,7 @@
# limitations under the License.
"""Image processor class for LLaVa-Onevision."""
from collections.abc import Iterable
from typing import Dict, List, Optional, Tuple, Union
from typing import Dict, Iterable, List, Optional, Tuple, Union
import numpy as np

View File

@ -17,8 +17,7 @@ Processor class for LLaVa-Onevision.
"""
import math
from collections.abc import Iterable
from typing import List, Union
from typing import Iterable, List, Union
import numpy as np

View File

@ -15,8 +15,7 @@
"""Longformer configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any, List, Optional, Union
from typing import TYPE_CHECKING, Any, List, Mapping, Optional, Union
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxConfig

View File

@ -14,7 +14,7 @@
# limitations under the License.
"""LongT5 model configuration"""
from collections.abc import Mapping
from typing import Mapping
from ...configuration_utils import PretrainedConfig
from ...onnx import OnnxSeq2SeqConfigWithPast

View File

@ -15,8 +15,7 @@
"""M2M100 model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Any, Optional
from typing import Any, Mapping, Optional
from ... import PreTrainedTokenizer
from ...configuration_utils import PretrainedConfig

View File

@ -15,8 +15,7 @@
"""Marian model configuration"""
from collections import OrderedDict
from collections.abc import Mapping
from typing import Any, Optional
from typing import Any, Mapping, Optional
from ... import PreTrainedTokenizer
from ...configuration_utils import PretrainedConfig

View File

@ -15,11 +15,10 @@
import json
import sys
from argparse import ArgumentParser
from collections.abc import Iterator
from dataclasses import dataclass
from pathlib import Path
from pprint import pformat
from typing import Any, Dict, List, Set, Tuple
from typing import Any, Dict, Iterator, List, Set, Tuple
import requests
import torch

View File

@ -15,8 +15,7 @@
"""Image processor class for Mask2Former."""
import math
from collections.abc import Iterable
from typing import Any, Dict, List, Optional, Set, Tuple, Union
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union
import numpy as np

View File

@ -14,11 +14,10 @@
# limitations under the License.
import sys
from argparse import ArgumentParser
from collections.abc import Iterator
from dataclasses import dataclass
from pathlib import Path
from pprint import pformat
from typing import Any, Dict, List, Set, Tuple
from typing import Any, Dict, Iterator, List, Set, Tuple
import requests
import torch

Some files were not shown because too many files have changed in this diff Show More