Compare commits

...

24 Commits

Author SHA1 Message Date
b356fce1da solve unequal cropping 2025-08-11 19:20:28 +02:00
af7f75e682 use existing methods, add default image 2025-08-11 16:44:06 +02:00
34ba5909a2 add an unnormalize image method 2025-08-11 16:43:27 +02:00
fbec904fb0 Merge branch 'main' into vision_visualizer 2025-08-06 19:19:09 +02:00
a1263dfe7b fixup 2025-08-06 19:17:38 +02:00
1878d6c4ff add captions and better tiling detection 2025-08-06 19:16:14 +02:00
cb2e0df2ec [image processor] fix glm4v (#39964)
* fix glm4v image process

* Update src/transformers/models/glm4v/image_processing_glm4v.py

---------

Co-authored-by: Pavel Iakubovskii <qubvel@gmail.com>
2025-08-06 17:46:58 +01:00
9ab75fc428 fix typo (#39936)
* fix typo

* fix modular instead

* fix

---------

Co-authored-by: y.korobko <y.korobko@tbank.ru>
2025-08-06 16:21:24 +00:00
43b3f58875 Fix grammatical error in MoE variable name: expert_hitted → expert_hit, hitted_experts → hit_experts (#39959)
* Fix grammatical error: expert_hitted -> expert_hit in MoE implementations

* Fix grammatical error: hitted_experts -> hit_experts in MoE implementation
2025-08-06 15:45:19 +00:00
dff6185d61 docs: fix typo in 'quantization-aware training' (#39904) 2025-08-06 14:52:43 +00:00
c7844c7a8e Enable gpt-oss mxfp4 on older hardware (sm75+) (#39940)
Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com>
2025-08-06 13:39:21 +00:00
dd70a8cb9d Fix MXFP4 quantizer validation to allow CPU inference with dequantize option (#39953)
* Fix MXFP4 quantizer validation to enable CPU dequantization

Move dequantize check before CUDA availability check to allow
CPU inference when quantization_config.dequantize is True.
This enables users to run MXFP4 models on CPU by automatically
converting them to BF16 format.

* Add tests for MXFP4 quantizer CPU dequantization validation

* fix: format mxfp4 test file with ruff
2025-08-06 15:20:41 +02:00
82eb67e62a [docs] ko toc fix (#39927) 2025-08-06 10:12:34 +00:00
9e76a6bb54 circleci: pin torch 2.7.1 until torchcodec is updated (#39951)
circleci torch 2.7.1

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2025-08-06 11:18:00 +02:00
910b319357 Fix CI: Tests failing on CPU due to torch.device('cpu').index being None (#39933)
replace routing_weights.device.index with a
2025-08-06 10:22:43 +02:00
369c99d0ce Avoid utils/check_bad_commit.py failing due to rate limit (requesting api.github.com) (#39918)
fix

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2025-08-05 21:52:20 +02:00
b771e476a8 [CI] post-GptOss fixes for green CI (#39929) 2025-08-05 20:04:59 +02:00
eb6e26acf3 Dev version 2025-08-05 18:09:30 +02:00
a6a18efe53 better namings 2025-08-05 17:30:05 +02:00
e581d2f2ce fixup 2025-07-25 08:02:39 +00:00
1f6822d114 move processor visualizer 2025-07-25 07:58:35 +00:00
edb70ae15c Merge branch 'main' into vision_visualizer 2025-07-24 12:50:27 +00:00
27bc371bea Merge branch 'main' into vision_visualizer 2025-07-22 13:01:45 +02:00
58c619e809 draft the vision visualizer 2025-03-21 18:53:04 +01:00
91 changed files with 701 additions and 138 deletions

View File

@ -5,7 +5,7 @@ ARG REF=main
RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs
ENV UV_PYTHON=/usr/local/bin/python
RUN pip install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools GitPython
RUN uv pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir --upgrade 'torch<2.8' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
# tensorflow pin matching setup.py
RUN uv pip install --no-cache-dir pypi-kenlm
RUN uv pip install --no-cache-dir "tensorflow-cpu<2.16" "tf-keras<2.16"

View File

@ -16,7 +16,7 @@ RUN cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local
RUN make install -j 10
RUN uv pip install --no-cache --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache --upgrade 'torch<2.8' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ja,testing,sentencepiece,jieba,spacy,ftfy,rjieba]" unidic unidic-lite
# spacy is not used so not tested. Causes to failures. TODO fix later

View File

@ -5,7 +5,7 @@ USER root
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git ffmpeg
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir 'torch<2.8' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" seqeval albumentations jiwer
RUN uv pip uninstall transformers

View File

@ -5,7 +5,7 @@ USER root
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git libgl1-mesa-glx libgl1 g++ tesseract-ocr
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir 'torch<2.8' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir --no-deps timm accelerate
RUN pip install -U --upgrade-strategy eager --no-cache-dir pytesseract python-Levenshtein opencv-python nltk
# RUN uv pip install --no-cache-dir natten==0.15.1+torch210cpu -f https://shi-labs.com/natten/wheels

View File

@ -5,7 +5,7 @@ USER root
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git ffmpeg
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir 'torch<2.8' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]"
RUN uv pip uninstall transformers

View File

@ -6,7 +6,7 @@ RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git g++
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN uv pip install --no-deps accelerate
RUN uv pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir 'torch<2.8' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,audio,sklearn,sentencepiece,vision,testing]"

View File

@ -5,7 +5,7 @@ USER root
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git git-lfs ffmpeg
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir 'torch<2.8' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken,num2words,video]"
RUN uv pip uninstall transformers

View File

@ -7,7 +7,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-de
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN uv pip install --no-cache-dir --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir 'torch<2.8' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
RUN git lfs install
RUN uv pip install --no-cache-dir pypi-kenlm

View File

@ -511,6 +511,8 @@
title: GPT2
- local: model_doc/gpt_bigcode
title: GPTBigCode
- local: model_doc/gpt_oss
title: GptOss
- local: model_doc/gptsan-japanese
title: GPTSAN Japanese
- local: model_doc/gpt-sw3
@ -617,8 +619,6 @@
title: OLMoE
- local: model_doc/open-llama
title: Open-Llama
- local: model_doc/openai_moe
title: OpenAIMoe
- local: model_doc/opt
title: OPT
- local: model_doc/pegasus

View File

@ -65,6 +65,10 @@ Learn how to quantize models in the [Quantization](../quantization) guide.
[[autodoc]] HqqConfig
## Mxfp4Config
[[autodoc]] Mxfp4Config
## FbgemmFp8Config
[[autodoc]] FbgemmFp8Config

View File

@ -24,11 +24,11 @@ rendered properly in your Markdown viewer.
</div>
</div>
# OpenAIMoE
# GptOss
## Overview
The OpenAIMoE model was proposed in [<INSERT PAPER NAME HERE>](<INSERT PAPER LINK HERE>) by <INSERT AUTHORS HERE>.
The GptOss model was proposed in [<INSERT PAPER NAME HERE>](<INSERT PAPER LINK HERE>) by <INSERT AUTHORS HERE>.
<INSERT SHORT SUMMARY HERE>
The abstract from the paper is the following:
@ -43,16 +43,16 @@ This model was contributed by [INSERT YOUR HF USERNAME HERE](https://huggingface
The original code can be found [here](<INSERT LINK TO GITHUB REPO HERE>).
## OpenAIMoeConfig
## GptOssConfig
[[autodoc]] OpenAIMoeConfig
[[autodoc]] GptOssConfig
## OpenAIMoeModel
## GptOssModel
[[autodoc]] OpenAIMoeModel
[[autodoc]] GptOssModel
- forward
## OpenAIMoeForCausalLM
## GptOssForCausalLM
[[autodoc]] OpenAIMoeForCausalLM
[[autodoc]] GptOssForCausalLM
- forward

View File

@ -16,7 +16,7 @@ rendered properly in your Markdown viewer.
# FP-Quant
[FP-Quant](https://github.com/IST-DASLab/FP-Quant) is a family of quantization algorithms tailored for the Blackwell generation of Nvidia GPUs. The goal is to allow for efficient post-training quantization (PTQ) and quantization-aware trainin (QAT) of LLMs in the [MXFP4 and NVFP4 data-types](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf).
[FP-Quant](https://github.com/IST-DASLab/FP-Quant) is a family of quantization algorithms tailored for the Blackwell generation of Nvidia GPUs. The goal is to allow for efficient post-training quantization (PTQ) and quantization-aware training (QAT) of LLMs in the [MXFP4 and NVFP4 data-types](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf).
Currently, only PTQ with MXFP4 is supported. Models can either be quantized on the fly with `quantization_config=FPQuantConfig()`:
@ -63,4 +63,4 @@ model.forward = torch.compile(model.forward, mode="max-autotune", fullgraph=True
FP-Quant currently performs best for very large batch size processing.
See [QuTLASS README](https://github.com/IST-DASLab/qutlass/blob/main/README.md) for speedups.
See [QuTLASS README](https://github.com/IST-DASLab/qutlass/blob/main/README.md) for speedups.

View File

@ -327,8 +327,6 @@
title: (번역중) Contribute new quantization method
title: (번역중) 경량화 메소드
- sections:
- local: performance
title: 성능 및 확장성
- local: in_translation
title: (번역중) Quantization
- local: llm_optims
@ -348,8 +346,6 @@
title: CPU에서 훈련
- local: perf_train_cpu_many
title: 다중 CPU에서 훈련하기
- local: perf_train_tpu_tf
title: TensorFlow로 TPU에서 훈련하기
- local: perf_train_special
title: Apple 실리콘에서 PyTorch 학습
- local: perf_hardware
@ -363,12 +359,8 @@
- local: perf_infer_gpu_one
title: 하나의 GPU를 활용한 추론
title: 추론 최적화하기
- local: big_models
title: 대형 모델을 인스턴스화
- local: debugging
title: 디버깅
- local: tf_xla
title: TensorFlow 모델을 위한 XLA 통합
- local: in_translation
title: (번역중) Optimize inference using `torch.compile()`
title: (번역중) 성능 및 확장성

View File

@ -60,7 +60,7 @@ from transformers.utils import check_min_version, send_example_telemetry
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
Array = Any
Dataset = datasets.arrow_dataset.Dataset

View File

@ -59,7 +59,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risk.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/flax/speech-recognition/requirements.txt")

View File

@ -55,7 +55,7 @@ from transformers.utils import check_min_version, send_example_telemetry
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
Array = Any
Dataset = datasets.arrow_dataset.Dataset

View File

@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")

View File

@ -55,7 +55,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.14.0", "To fix: pip install -r examples/pytorch/audio-classification/requirements.txt")

View File

@ -63,7 +63,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")

View File

@ -68,7 +68,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")

View File

@ -61,7 +61,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
logger = get_logger(__name__)

View File

@ -51,7 +51,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")

View File

@ -56,7 +56,7 @@ Any model supported by the AutoModelForMaskedImageModeling API can be used.
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")

View File

@ -61,7 +61,7 @@ Any model supported by the AutoModelForMaskedImageModeling API can be used.
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")

View File

@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/instance-segmentation/requirements.txt")

View File

@ -63,7 +63,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/instance-segmentation/requirements.txt")

View File

@ -69,7 +69,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")

View File

@ -71,7 +71,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
logger = get_logger(__name__)

View File

@ -72,7 +72,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")

View File

@ -74,7 +74,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
logger = get_logger(__name__)

View File

@ -68,7 +68,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")

View File

@ -71,7 +71,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
logger = get_logger(__name__)
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")

View File

@ -61,7 +61,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")

View File

@ -57,7 +57,7 @@ from transformers.utils import check_min_version, send_example_telemetry
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
logger = logging.getLogger(__name__)

View File

@ -65,7 +65,7 @@ from transformers.utils import check_min_version, send_example_telemetry
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
logger = get_logger(__name__)
# You should update this to your particular problem to have better documentation of `model_type`

View File

@ -59,7 +59,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/object-detection/requirements.txt")

View File

@ -63,7 +63,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
logging.basicConfig(level=logging.INFO)
logger = get_logger(__name__)

View File

@ -49,7 +49,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")

View File

@ -47,7 +47,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")

View File

@ -54,7 +54,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")

View File

@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")

View File

@ -45,7 +45,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")

View File

@ -62,7 +62,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/semantic-segmentation/requirements.txt")

View File

@ -62,7 +62,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
logger = get_logger(__name__)

View File

@ -61,7 +61,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")

View File

@ -64,7 +64,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")

View File

@ -60,7 +60,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")

View File

@ -67,7 +67,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")

View File

@ -71,7 +71,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
logger = get_logger(__name__)
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")

View File

@ -61,7 +61,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")

View File

@ -63,7 +63,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")

View File

@ -63,7 +63,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
logger = get_logger(__name__)

View File

@ -62,7 +62,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")

View File

@ -60,7 +60,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")

View File

@ -67,7 +67,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
logger = get_logger(__name__)
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")

View File

@ -66,7 +66,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")

View File

@ -71,7 +71,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
logger = get_logger(__name__)
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")

View File

@ -50,7 +50,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version(
"datasets>=1.8.0", "To fix: pip install -r examples/tensorflow/contrastive-image-text/requirements.txt"

View File

@ -54,7 +54,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")

View File

@ -49,7 +49,7 @@ from transformers.utils import check_min_version, send_example_telemetry
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
logger = logging.getLogger(__name__)

View File

@ -61,7 +61,7 @@ except (ModuleNotFoundError, ImportError):
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
logger = logging.getLogger(__name__)

View File

@ -52,7 +52,7 @@ from transformers.utils.versions import require_version
# region Checking dependencies
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")

View File

@ -46,7 +46,7 @@ from transformers.utils import check_min_version, send_example_telemetry
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
task_to_keys = {
"cola": ("sentence", None),

View File

@ -55,7 +55,7 @@ from transformers.utils.versions import require_version
# region Dependencies and constants
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.55.0.dev0")
check_min_version("4.56.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")

View File

@ -463,7 +463,7 @@ install_requires = [
setup(
name="transformers",
version="4.55.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
version="4.56.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
author_email="transformers@huggingface.co",
description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",

View File

@ -18,7 +18,7 @@
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
# in the namespace without actually importing anything (and especially none of the backends).
__version__ = "4.55.0.dev0"
__version__ = "4.56.0.dev0"
from pathlib import Path
from typing import TYPE_CHECKING

View File

@ -452,6 +452,105 @@ def normalize(
return image
def unnormalize(
image: Union[np.ndarray, "torch.Tensor"],
mean: Union[float, Collection[float]],
std: Union[float, Collection[float]],
data_format: Optional[ChannelDimension] = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
):
"""
Inverse of `normalize`:
image = image * std + mean
Accepts NumPy arrays or PyTorch tensors and mirrors `normalize`'s API,
but also handles 4D/5D by broadcasting along the channel axis and
collapsing leading batch dims. Defaults to NHWC output for visualization.
"""
# type check
is_np = isinstance(image, np.ndarray)
is_torch = isinstance(image, torch.Tensor)
if not (is_np or is_torch):
raise TypeError("image must be a numpy array or a torch tensor")
# infer layout
if input_data_format is None:
input_data_format = infer_channel_dimension_format(image)
# cast policy (match normalize): cast only if not floating
if is_np:
if not np.issubdtype(image.dtype, np.floating):
image = image.astype(np.float32)
else:
if not image.is_floating_point():
image = image.float()
# channel axis and sizes
ch_axis = get_channel_dimension_axis(image, input_data_format=input_data_format)
num_channels = int(image.shape[ch_axis])
# normalize mean/std to per-channel vectors
def _as_seq(x, n):
if isinstance(x, Collection):
if len(x) != n:
raise ValueError(f"value must have {n} elements if it is an iterable, got {len(x)}")
return x
return [x] * n
mean_seq = _as_seq(mean, num_channels)
std_seq = _as_seq(std, num_channels)
# make broadcastable tensors/arrays shaped [1, ..., C (at ch_axis), ..., 1]
bshape = [1] * image.ndim
bshape[ch_axis] = num_channels
if is_np:
mean_arr = np.asarray(mean_seq, dtype=image.dtype).reshape(bshape)
std_arr = np.asarray(std_seq, dtype=image.dtype).reshape(bshape)
image = image * std_arr + mean_arr
else:
mean_arr = torch.as_tensor(mean_seq, dtype=image.dtype, device=image.device).view(bshape)
std_arr = torch.as_tensor(std_seq, dtype=image.dtype, device=image.device).view(bshape)
image = image * std_arr + mean_arr
# convert to numpy for plotting
if is_torch:
image = image.detach().cpu().numpy()
is_np = True # from here on
# target layout: default to NHWC so downstream viz works out of the box
target_format = data_format or ChannelDimension.LAST
# collapse any leading batch dims into one, preserving (C,H,W) or (H,W,C)
if input_data_format == ChannelDimension.FIRST:
# layout: [*, C, H, W]
lead = int(np.prod(image.shape[: image.ndim - 3])) if image.ndim > 3 else 1
if image.ndim == 3:
c, h, w = image.shape
image = image.reshape(1, c, h, w)
lead = 1
else:
c, h, w = image.shape[-3:]
image = image.reshape(lead, c, h, w)
if target_format == ChannelDimension.LAST:
image = np.transpose(image, (0, 2, 3, 1)) # -> [N, H, W, C]
else:
# layout: [*, H, W, C]
lead = int(np.prod(image.shape[: image.ndim - 3])) if image.ndim > 3 else 1
if image.ndim == 3:
h, w, c = image.shape
image = image.reshape(1, h, w, c)
lead = 1
else:
h, w, c = image.shape[-3:]
image = image.reshape(lead, h, w, c)
if target_format == ChannelDimension.FIRST:
image = np.transpose(image, (0, 3, 1, 2)) # -> [N, C, H, W]
return image
def center_crop(
image: np.ndarray,
size: tuple[int, int],

View File

@ -264,8 +264,8 @@ def routing_torch_dist(
expt_data = compute_expt_data_torch(hist, n_local_experts, n_gates_pad)
hitted_experts = n_expts_act
return RoutingData(gate_scal, hist, n_local_experts, hitted_experts, expt_data), gather_indx, scatter_indx
hit_experts = n_expts_act
return RoutingData(gate_scal, hist, n_local_experts, hit_experts, expt_data), gather_indx, scatter_indx
def mlp_forward(self, hidden_states):
@ -280,7 +280,10 @@ def mlp_forward(self, hidden_states):
batch_size = hidden_states.shape[0]
hidden_states = hidden_states.reshape(-1, self.router.hidden_dim)
router_logits = nn.functional.linear(hidden_states, self.router.weight, self.router.bias)
routing_data, gather_idx, scatter_idx = routing(router_logits, self.router.top_k)
with torch.cuda.device(router_logits.device):
routing_data, gather_idx, scatter_idx = routing(router_logits, self.router.top_k)
routed_out = self.experts(hidden_states, routing_data, gather_idx, scatter_idx)
routed_out = routed_out.reshape(batch_size, -1, self.router.hidden_dim)
return routed_out, router_logits

View File

@ -356,8 +356,8 @@ class Ernie4_5_MoeSparseMoeBlock(nn.Module):
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
# Loop over all available experts in the model and perform the computation on each expert
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hitted:
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hit:
expert_layer = self.experts[expert_idx]
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))

View File

@ -167,8 +167,8 @@ class Ernie4_5_MoeSparseMoeBlock(nn.Module):
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
# Loop over all available experts in the model and perform the computation on each expert
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hitted:
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hit:
expert_layer = self.experts[expert_idx]
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))

View File

@ -141,7 +141,7 @@ class Glm4vImageProcessor(BaseImageProcessor):
super().__init__(**kwargs)
if size is not None and ("shortest_edge" not in size or "longest_edge" not in size):
raise ValueError("size must contain 'shortest_edge' and 'longest_edge' keys.")
else:
elif size is None:
size = {"shortest_edge": 112 * 112, "longest_edge": 28 * 28 * 15000}
self.size = size

View File

@ -75,7 +75,7 @@ class GptOssExperts(nn.Module):
def forward(self, hidden_states: torch.Tensor, router_indices=None, routing_weights=None) -> torch.Tensor:
"""
When training is is more efficient to just loop over the experts and compute the output for each expert
When training it is more efficient to just loop over the experts and compute the output for each expert
as otherwise the memory would explode.
For inference we can sacrifice some memory and compute the output for all experts at once. By repeating the inputs.
@ -97,8 +97,8 @@ class GptOssExperts(nn.Module):
expert_mask = expert_mask.permute(2, 1, 0)
# we sum on the top_k and on the sequence lenght to get which experts
# are hit this time around
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hitted[:]:
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hit[:]:
with torch.no_grad():
_, token_idx = torch.where(expert_mask[expert_idx[0]])
current_state = hidden_states[token_idx]

View File

@ -73,7 +73,7 @@ class GptOssExperts(nn.Module):
def forward(self, hidden_states: torch.Tensor, router_indices=None, routing_weights=None) -> torch.Tensor:
"""
When training is is more efficient to just loop over the experts and compute the output for each expert
When training it is more efficient to just loop over the experts and compute the output for each expert
as otherwise the memory would explode.
For inference we can sacrifice some memory and compute the output for all experts at once. By repeating the inputs.
@ -95,8 +95,8 @@ class GptOssExperts(nn.Module):
expert_mask = expert_mask.permute(2, 1, 0)
# we sum on the top_k and on the sequence lenght to get which experts
# are hit this time around
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hitted[:]:
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hit[:]:
with torch.no_grad():
_, token_idx = torch.where(expert_mask[expert_idx[0]])
current_state = hidden_states[token_idx]

View File

@ -40,7 +40,7 @@ if is_torch_flex_attn_available():
logger = logging.get_logger(__name__)
# Copied from transformers.models.jetmoe.modeling_jetmoe.load_balancing_loss_func
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func
def load_balancing_loss_func(
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
num_experts: Optional[int] = None,
@ -119,7 +119,8 @@ def load_balancing_loss_func(
router_per_expert_attention_mask, dim=0
)
rank = routing_weights.shape[1] * int(routing_weights.device.index)
device_index = routing_weights.device.index if routing_weights.device.index is not None else 0
rank = routing_weights.shape[1] * int(device_index)
overall_loss = torch.sum(
tokens_per_expert[:, rank : rank + routing_weights.shape[1]] * router_prob_per_expert.unsqueeze(0)
)

View File

@ -1647,7 +1647,8 @@ def load_balancing_loss_func(
router_per_expert_attention_mask, dim=0
)
rank = routing_weights.shape[1] * int(routing_weights.device.index)
device_index = routing_weights.device.index if routing_weights.device.index is not None else 0
rank = routing_weights.shape[1] * int(device_index)
overall_loss = torch.sum(
tokens_per_expert[:, rank : rank + routing_weights.shape[1]] * router_prob_per_expert.unsqueeze(0)
)

View File

@ -918,7 +918,8 @@ def load_balancing_loss_func(
router_per_expert_attention_mask, dim=0
)
rank = routing_weights.shape[1] * int(routing_weights.device.index)
device_index = routing_weights.device.index if routing_weights.device.index is not None else 0
rank = routing_weights.shape[1] * int(device_index)
overall_loss = torch.sum(
tokens_per_expert[:, rank : rank + routing_weights.shape[1]] * router_prob_per_expert.unsqueeze(0)
)

View File

@ -67,7 +67,7 @@ is_fast_path_available = all(
logger = logging.get_logger(__name__)
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func with gate->router
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func with gate->router
def load_balancing_loss_func(
router_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
num_experts: Optional[int] = None,
@ -148,7 +148,8 @@ def load_balancing_loss_func(
router_per_expert_attention_mask, dim=0
)
rank = routing_weights.shape[1] * int(routing_weights.device.index)
device_index = routing_weights.device.index if routing_weights.device.index is not None else 0
rank = routing_weights.shape[1] * int(device_index)
overall_loss = torch.sum(
tokens_per_expert[:, rank : rank + routing_weights.shape[1]] * router_prob_per_expert.unsqueeze(0)
)

View File

@ -50,7 +50,7 @@ if is_flash_attn_available():
logger = logging.get_logger(__name__)
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func
def load_balancing_loss_func(
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
num_experts: Optional[int] = None,
@ -129,7 +129,8 @@ def load_balancing_loss_func(
router_per_expert_attention_mask, dim=0
)
rank = routing_weights.shape[1] * int(routing_weights.device.index)
device_index = routing_weights.device.index if routing_weights.device.index is not None else 0
rank = routing_weights.shape[1] * int(device_index)
overall_loss = torch.sum(
tokens_per_expert[:, rank : rank + routing_weights.shape[1]] * router_prob_per_expert.unsqueeze(0)
)

View File

@ -465,8 +465,8 @@ class MiniMaxSparseMoeBlock(nn.Module):
# this will be used to easily index which expert is going to be sollicitated
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hitted:
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hit:
expert_layer = self.experts[expert_idx]
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))
# Index the correct hidden states and compute the expert hidden state for

View File

@ -121,8 +121,8 @@ class MixtralSparseMoeBlock(nn.Module):
# this will be used to easily index which expert is going to be sollicitated
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hitted:
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hit:
expert_layer = self.experts[expert_idx]
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))
# Index the correct hidden states and compute the expert hidden state for

View File

@ -201,8 +201,8 @@ class MixtralSparseMoeBlock(nn.Module):
# this will be used to easily index which expert is going to be sollicitated
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hitted:
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hit:
expert_layer = self.experts[expert_idx]
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))
# Index the correct hidden states and compute the expert hidden state for

View File

@ -39,7 +39,7 @@ if is_flash_attn_available():
logger = logging.get_logger(__name__)
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func
def load_balancing_loss_func(
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
num_experts: Optional[int] = None,
@ -118,7 +118,8 @@ def load_balancing_loss_func(
router_per_expert_attention_mask, dim=0
)
rank = routing_weights.shape[1] * int(routing_weights.device.index)
device_index = routing_weights.device.index if routing_weights.device.index is not None else 0
rank = routing_weights.shape[1] * int(device_index)
overall_loss = torch.sum(
tokens_per_expert[:, rank : rank + routing_weights.shape[1]] * router_prob_per_expert.unsqueeze(0)
)

View File

@ -55,7 +55,7 @@ _prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_m
logger = logging.get_logger(__name__)
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func
def load_balancing_loss_func(
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
num_experts: Optional[int] = None,
@ -134,7 +134,8 @@ def load_balancing_loss_func(
router_per_expert_attention_mask, dim=0
)
rank = routing_weights.shape[1] * int(routing_weights.device.index)
device_index = routing_weights.device.index if routing_weights.device.index is not None else 0
rank = routing_weights.shape[1] * int(device_index)
overall_loss = torch.sum(
tokens_per_expert[:, rank : rank + routing_weights.shape[1]] * router_prob_per_expert.unsqueeze(0)
)

View File

@ -59,7 +59,6 @@ if is_torch_flex_attn_available():
logger = logging.get_logger(__name__)
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
def load_balancing_loss_func(
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
num_experts: Optional[int] = None,
@ -138,7 +137,8 @@ def load_balancing_loss_func(
router_per_expert_attention_mask, dim=0
)
rank = routing_weights.shape[1] * int(routing_weights.device.index)
device_index = routing_weights.device.index if routing_weights.device.index is not None else 0
rank = routing_weights.shape[1] * int(device_index)
overall_loss = torch.sum(
tokens_per_expert[:, rank : rank + routing_weights.shape[1]] * router_prob_per_expert.unsqueeze(0)
)
@ -621,8 +621,8 @@ class Qwen2MoeSparseMoeBlock(nn.Module):
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
# Loop over all available experts in the model and perform the computation on each expert
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hitted:
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hit:
expert_layer = self.experts[expert_idx]
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))

View File

@ -244,8 +244,8 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
# Loop over all available experts in the model and perform the computation on each expert
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hitted:
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hit:
expert_layer = self.experts[expert_idx]
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))

View File

@ -100,8 +100,8 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
# Loop over all available experts in the model and perform the computation on each expert
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hitted:
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
for expert_idx in expert_hit:
expert_layer = self.experts[expert_idx]
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))

View File

@ -56,33 +56,44 @@ class Mxfp4HfQuantizer(HfQuantizer):
"Using mxfp4 quantization requires torch"
"Please install the latest version of torch ( pip install --upgrade torch )"
)
if self.quantization_config.dequantize:
return
if not torch.cuda.is_available():
raise RuntimeError("Using MXFP4 quantized models requires a GPU")
if not is_accelerate_available():
raise ImportError("Using mxfp4 requires Accelerate: `pip install accelerate`")
if self.quantization_config.dequantize:
return
compute_capability = torch.cuda.get_device_capability()
major, minor = compute_capability
gpu_is_supported = compute_capability >= (7, 5)
kernels_available = is_triton_available("3.4.0") and is_triton_kernels_availalble()
if not is_triton_available("3.4.0") or not is_triton_kernels_availalble():
if self.pre_quantized and not self.quantization_config.dequantize:
if self.pre_quantized:
# On unsupported GPUs or without kernels, we will dequantize the model to bf16
if not gpu_is_supported:
logger.warning_once(
"MXFP4 quantization is only supported on GPUs with compute capability >= 7.5 (e.g T4, A100, L4, H100, or B200). "
"We will default to dequantizing the model to bf16."
)
self.quantization_config.dequantize = True
return
if not kernels_available:
logger.warning_once(
"MXFP4 quantization requires triton >= 3.4.0 and triton_kernels installed, we will default to dequantizing the model to bf16"
)
self.quantization_config.dequantize = True
return
else:
# we can't quantize the model in this case so we raise an error
raise ValueError("MXFP4 quantization requires triton >= 3.4.0 and triton_kernels installed")
if major < 9:
elif not gpu_is_supported:
# we can't quantize the model in this case so we raise an error
raise ValueError(
"MXFP4 quantized models is only supported on GPUs with compute capability >= 9.0 (e.g H100, or B100)"
"MXFP4 quantization is only supported on GPUs with compute capability >= 7.5 (e.g T4, A100, L4, H100, or B200)"
)
elif not kernels_available:
# we can't quantize the model in this case so we raise an error
raise ValueError("MXFP4 quantization requires triton >= 3.4.0 and triton_kernels installed")
device_map = kwargs.get("device_map", None)
if device_map is None:

View File

@ -0,0 +1,373 @@
import re
from typing import Optional, Union
import matplotlib.pyplot as plt
import numpy as np
import requests
import torch
from PIL import Image
from ..image_transforms import convert_to_rgb, to_pil_image, unnormalize
from ..models.auto import AutoConfig, AutoProcessor
# archs failing that should raise immediately for this util:
INCOMPATIBLE_MODELS = [
"bit",
"colpali",
"colqwen2",
"convnext",
"d_fine",
"data2vec",
"efficientloftr",
"efficientnet",
"fuyu",
"gemma3",
"glm4v",
"glpn",
"hgnet_v2",
"hiera",
"internvl",
"janus",
"layoutlmv3",
"levit",
"lightglue",
"llama4",
"mistral3",
"mllama",
"mobilevit",
"mobilevitv2",
"musicgen",
"musicgen_melody",
"oneformer",
"perceiver",
"perception_lm",
"phi4_multimodal",
"qwen2_5_omni",
"qwen2_5_vl",
"qwen2_vl",
"regnet",
"resnet",
"superglue",
"superpoint",
"swin2sr",
"timm_wrapper",
"tvp",
"udop",
"vitmatte",
"vitpose",
"vjepa2",
"whisper",
]
DEFAULT_IMAGE_URL = (
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/hf-logo-224x224.png"
)
def _looks_like_global(tile: np.ndarray, base: Image.Image, *, mae_tol: float = 0.3) -> bool:
"""
Very simple visualizer heuristic.
"""
base_r = base.convert("RGB").resize(tile.shape[:2][::-1], Image.BILINEAR)
base_np = np.asarray(base_r).astype(np.float32) / 255.0
tile_f32 = tile.astype(np.float32)
if tile_f32.max() > 1.5:
tile_f32 /= 255.0
mae = np.abs(tile_f32 - base_np).mean()
return mae < mae_tol
class ImageVisualizer:
def __init__(self, repo_id: str):
self.processor = AutoProcessor.from_pretrained(repo_id, trust_remote_code=False)
self.config = AutoConfig.from_pretrained(repo_id, trust_remote_code=False)
if hasattr(self.processor, "image_processor"):
image_processor = self.processor.image_processor
elif hasattr(self.processor, "image_mean"):
image_processor = self.processor # weak test, but works most of the time
else:
raise ValueError(f"No image processor found for {repo_id}.")
self.channel_means = getattr(image_processor, "image_mean", [0.485, 0.456, 0.406])
self.channel_stds = getattr(image_processor, "image_std", [0.229, 0.224, 0.225])
if hasattr(self.processor, "image_token"):
self.image_token_marker = self.processor.image_token
elif hasattr(self.processor, "image_token_id"):
self.image_token_marker = self.processor.decode(self.processor.image_token_id)
else:
self.image_token_marker = "<image>"
self.default_prompt = f"{self.image_token_marker} How does it look?"
self.vision_config = getattr(self.config, "vision_config", None)
self.patch_size = getattr(self.vision_config, "patch_size", getattr(image_processor, "patch_size", 14))
self.merge_size = getattr(image_processor, "merge_size", 1)
def _pixel_values_as_tensor(
self, pixel_values: Union[torch.Tensor, np.ndarray, list[np.ndarray], list[torch.Tensor]]
):
"""
Normalize input to a 4D tensor with shape (batch, channels, height, width).
Supports input of shape:
- (B, C, H, W)
- (B, N, C, H, W) -> flattened to (B*N, C, H, W)
- (C, H, W) -> expanded to (1, C, H, W)
- list/tuple of arrays or tensors
"""
if isinstance(pixel_values, (list, tuple)):
tensor_list = [pv if isinstance(pv, torch.Tensor) else torch.tensor(pv) for pv in pixel_values]
pixel_values = torch.stack(tensor_list, dim=0)
if not isinstance(pixel_values, torch.Tensor):
pixel_values = torch.tensor(pixel_values)
if pixel_values.ndim == 5:
batch_size, num_images, num_channels, height, width = pixel_values.shape
pixel_values = pixel_values.view(batch_size * num_images, num_channels, height, width)
elif pixel_values.ndim == 4:
pass
elif pixel_values.ndim == 3:
pixel_values = pixel_values.unsqueeze(0)
else:
raise ValueError(f"Unexpected pixel tensor shape {pixel_values.shape}")
return pixel_values
def _display_single_image(self, image_array: np.ndarray, show_patch_grid: bool, figsize=(7, 7)):
plt.figure(figsize=figsize)
plt.imshow(image_array)
plt.xticks([])
plt.yticks([])
if show_patch_grid:
height, width = image_array.shape[:2]
step = max(1, min(height, width) // self.patch_size)
for x_pos in range(0, width, step):
plt.axvline(x_pos, color="red", linewidth=0.5)
for y_pos in range(0, height, step):
plt.axhline(y_pos, color="red", linewidth=0.5)
caption = f"{width}×{height} | mean={', '.join(f'{m:.3f}' for m in self.channel_means)} | std={', '.join(f'{s:.3f}' for s in self.channel_stds)}"
plt.tight_layout()
plt.figtext(0.5, -0.02, caption, ha="center", va="top", fontsize=12)
plt.show()
def _display_tiled_images(
self,
tiles_array: np.ndarray,
source_image: Image.Image,
rows: Optional[int] = None,
cols: Optional[int] = None,
aspect_ratio: float = 1.0,
add_grid: bool = True,
figsize=(7, 7),
):
"""
Display a grid of image tiles. Attempts to detect and preserve the original/global image tile,
which is then shown separately at the end.
"""
num_tiles = tiles_array.shape[0]
original_tile_index = None
saved_original_tile = None
for idx in (0, num_tiles - 1):
if _looks_like_global(tiles_array[idx], source_image):
original_tile_index = idx
break
if original_tile_index is not None:
saved_original_tile = tiles_array[original_tile_index]
tiles_array = np.delete(tiles_array, original_tile_index, axis=0)
num_tiles -= 1
# Infer grid if not specified
grid_rows, grid_cols = rows, cols
if grid_rows is None or grid_cols is None:
if aspect_ratio >= 1:
guessed_cols = int(np.ceil(np.sqrt(num_tiles * aspect_ratio)))
guessed_rows = int(np.ceil(num_tiles / max(guessed_cols, 1)))
else:
guessed_rows = int(np.ceil(np.sqrt(num_tiles / max(aspect_ratio, 1e-8))))
guessed_cols = int(np.ceil(num_tiles / max(guessed_rows, 1)))
grid_rows = grid_rows if grid_rows is not None else guessed_rows
grid_cols = grid_cols if grid_cols is not None else guessed_cols
fig, axes = plt.subplots(grid_rows, grid_cols, figsize=figsize, squeeze=False)
tile_index = 0
for row_idx in range(grid_rows):
for col_idx in range(grid_cols):
ax = axes[row_idx, col_idx]
if tile_index < num_tiles:
tile_image = tiles_array[tile_index]
ax.imshow(tile_image)
ax.set_xticks([])
ax.set_yticks([])
if add_grid:
height, width = tile_image.shape[:2]
step = max(1, min(height, width) // self.patch_size)
for x_pos in range(0, width, step):
ax.axvline(x_pos, color="red", linewidth=0.5)
for y_pos in range(0, height, step):
ax.axhline(y_pos, color="red", linewidth=0.5)
else:
ax.axis("off")
tile_index += 1
unique = sorted({f"{t.shape[1]}×{t.shape[0]}" for t in tiles_array})
sizes = ", ".join(unique)
caption = f"{tiles_array.shape[0]} patches | {sizes} | mean={', '.join(f'{m:.3f}' for m in self.channel_means)} | std={', '.join(f'{s:.3f}' for s in self.channel_stds)}"
plt.tight_layout()
fig.text(0.5, 0.02, caption, ha="center", va="bottom", fontsize=12)
plt.show()
if saved_original_tile is not None:
fig2, ax2 = plt.subplots(figsize=figsize)
ax2.imshow(saved_original_tile)
ax2.set_xticks([])
ax2.set_yticks([])
ax2.set_aspect("equal", adjustable="box")
fig2.subplots_adjust(left=0, right=1, top=1, bottom=0) # no clipping
h0, w0 = saved_original_tile.shape[:2]
caption = f"{w0}×{h0} | mean={', '.join(f'{m:.3f}' for m in self.channel_means)} | std={', '.join(f'{s:.3f}' for s in self.channel_stds)}"
fig2.text(0.5, 0.02, caption, ha="center", va="bottom", fontsize=12)
plt.show()
def default_message(self, full_output: bool = False) -> str:
"""
Build a single formatted prompt string using the processor's chat template.
Contains one image (HF logo) and one user text message.
If available, adds the generation prompt as well.
Falls back to a minimal '<image>' string if no template is available.
"""
# ensure this is a multimodal processor with image + tokenizer
if not (
hasattr(self.processor, "attributes")
and "image_processor" in self.processor.attributes
and "tokenizer" in self.processor.attributes
):
raise RuntimeError(
"Processor does not expose both 'image_processor' and 'tokenizer'; cannot build multimodal example."
)
conversation = [
{
"role": "user",
"content": [
{
"type": "image",
"url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/hf-logo-224x224.png",
},
{"type": "text", "text": "Please describe this image."},
],
}
]
try:
print("For a 224x224 RGB png image: \n")
decoded_message = self.processor.batch_decode(
self.processor.apply_chat_template(
conversation,
add_generation_prompt=True,
tokenize=True,
return_dict=False,
truncation=False,
),
skip_special_tokens=False,
)[0]
image_token_string = getattr(self.processor, "image_token", "<image>")
token_escaped = re.escape(image_token_string)
image_token_run_pattern = re.compile(rf"(?:{token_escaped})(?:\s*{token_escaped}){{2,}}")
def compress_image_token_run(match: re.Match) -> str:
n_tokens = match.group(0).count(image_token_string)
return f"{image_token_string}[...{n_tokens} tokens...]{image_token_string}"
if full_output:
return decoded_message
else:
return image_token_run_pattern.sub(compress_image_token_run, decoded_message)
except ValueError:
image_token_string = getattr(
self.processor,
"image_token",
getattr(getattr(self.processor, "tokenizer", None), "image_token", "<image>"),
)
return f"{image_token_string} {'Please describe this image.'}"
def visualize(
self,
images: Optional[Union[Image.Image, np.ndarray, str, list[Union[Image.Image, np.ndarray, str]]]] = None,
rows: Optional[int] = None,
cols: Optional[int] = None,
add_grid: bool = True,
figsize=(12, 12),
):
"""
Visualize the model-processed image(s). Only single images are supported.
If the processor returns multiple tiles, display them in a grid with optional patch grid overlay.
"""
if images is None:
images = Image.open(requests.get(DEFAULT_IMAGE_URL, stream=True).raw)
if not isinstance(images, list):
images = [images]
else:
if len(images) > 1:
raise ValueError(
"You passed a list of several images. Only single images are accepted by the visualizer."
)
pil_images = [convert_to_rgb(to_pil_image(x)) for x in images]
img_width, img_height = pil_images[0].size
aspect_ratio = img_width / max(img_height, 1)
processed_inputs = self.processor(images=pil_images, text=self.default_prompt, return_tensors="pt")
pixel_values = processed_inputs["pixel_values"]
unnormalized = unnormalize(pixel_values, mean=self.channel_means, std=self.channel_stds)
if unnormalized.ndim == 3 or unnormalized.shape[0] == 1:
self._display_single_image(
unnormalized[0] if unnormalized.ndim == 4 else unnormalized,
show_patch_grid=add_grid,
figsize=figsize,
)
return
elif unnormalized.ndim != 4:
raise ValueError(f"Unsupported shape after unnormalization: {unnormalized.shape}")
num_tiles = unnormalized.shape[0]
if rows is None or cols is None:
tile_h, tile_w = unnormalized.shape[1:3]
tile_aspect = tile_w / tile_h if tile_h > 0 else 1.0
target_aspect = aspect_ratio / tile_aspect
best_rows, best_cols = 1, num_tiles
min_diff = float("inf")
for r in range(1, num_tiles + 1):
c = int(np.ceil(num_tiles / r))
diff = abs((c / r) - target_aspect)
if diff < min_diff:
min_diff = diff
best_rows, best_cols = r, c
rows = best_rows
cols = best_cols
self._display_tiled_images(
unnormalized,
pil_images[0],
rows=rows,
cols=cols,
aspect_ratio=aspect_ratio,
add_grid=add_grid,
figsize=figsize,
)

View File

@ -107,18 +107,31 @@ class Mxfp4QuantizerTest(unittest.TestCase):
def test_quantizer_validation_low_compute_capability(self):
"""Test quantizer validation with low compute capability"""
with patch("torch.cuda.get_device_capability", return_value=(8, 0)):
with patch("torch.cuda.get_device_capability", return_value=(7, 0)):
from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
config = Mxfp4Config()
quantizer = Mxfp4HfQuantizer(config)
quantizer.pre_quantized = False
with self.assertRaises(ValueError):
quantizer.validate_environment()
def test_quantizer_validation_low_compute_capability_with_prequantized(self):
"""Test quantizer validation with low compute capability"""
with patch("torch.cuda.get_device_capability", return_value=(7, 0)):
from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
config = Mxfp4Config()
quantizer = Mxfp4HfQuantizer(config)
with self.assertRaises(ValueError):
quantizer.validate_environment()
# Should automatically set dequantize=True and warn
quantizer.validate_environment()
self.assertTrue(quantizer.quantization_config.dequantize)
def test_quantizer_validation_low_compute_capability_with_dequantize(self):
"""Test quantizer validation with low compute capability but dequantize enabled"""
with patch("torch.cuda.get_device_capability", return_value=(8, 0)):
with patch("torch.cuda.get_device_capability", return_value=(7, 0)):
from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
config = Mxfp4Config(dequantize=True)
@ -131,6 +144,52 @@ class Mxfp4QuantizerTest(unittest.TestCase):
if "compute capability" in str(e):
self.fail("Should not raise compute capability error when dequantize=True")
def test_quantizer_validation_dequantize_on_cpu(self):
"""Test quantizer validation with dequantize enabled on CPU-only environment"""
with patch("torch.cuda.is_available", return_value=False):
from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
config = Mxfp4Config(dequantize=True)
quantizer = Mxfp4HfQuantizer(config)
# Should not raise error when dequantize=True even without CUDA
try:
quantizer.validate_environment()
except RuntimeError as e:
if "requires a GPU" in str(e):
self.fail("Should not raise GPU requirement error when dequantize=True on CPU")
def test_quantizer_validation_order_dequantize_before_cuda_check(self):
"""Test that dequantize check happens before CUDA availability check"""
# Mock both torch.cuda.is_available and is_accelerate_available to return False
with (
patch("torch.cuda.is_available", return_value=False),
patch(
"transformers.quantizers.quantizer_mxfp4.is_accelerate_available",
return_value=False,
),
):
from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
# Test with dequantize=True - should pass even without CUDA and accelerate
config = Mxfp4Config(dequantize=True)
quantizer = Mxfp4HfQuantizer(config)
# This should not raise any error because dequantize check comes first
try:
quantizer.validate_environment()
except (RuntimeError, ImportError) as e:
if "requires a GPU" in str(e) or "requires Accelerate" in str(e):
self.fail(f"Should not raise error when dequantize=True: {e}")
# Test with dequantize=False - should still fail due to missing CUDA
config = Mxfp4Config(dequantize=False)
quantizer = Mxfp4HfQuantizer(config)
with self.assertRaises(RuntimeError) as context:
quantizer.validate_environment()
self.assertIn("requires a GPU", str(context.exception))
def test_quantizer_validation_missing_triton(self):
"""Test quantizer validation when triton is not available"""
with (

View File

@ -171,6 +171,12 @@ if __name__ == "__main__":
print(f"start_commit: {args.start_commit}")
print(f"end_commit: {args.end_commit}")
# `get_commit_info` uses `requests.get()` to request info. via `api.github.com` without using token.
# If there are many new failed tests in a workflow run, this script may fail at some point with `KeyError` at
# `pr_number = pr_info_for_commit[0]["number"]` due to the rate limit.
# Let's cache the commit info. and reuse them whenever possible.
commit_info_cache = {}
if len({args.test is None, args.file is None}) != 2:
raise ValueError("Exactly one argument `test` or `file` must be specified.")
@ -191,7 +197,14 @@ if __name__ == "__main__":
for test in failed_tests:
commit = find_bad_commit(target_test=test, start_commit=args.start_commit, end_commit=args.end_commit)
info = {"test": test, "commit": commit}
info.update(get_commit_info(commit))
if commit in commit_info_cache:
commit_info = commit_info_cache[commit]
else:
commit_info = get_commit_info(commit)
commit_info_cache[commit] = commit_info
info.update(commit_info)
failed_tests_with_bad_commits.append(info)
# If no single-gpu test failures, remove the key

View File

@ -345,6 +345,8 @@ SPECIAL_CASES_TO_ALLOW.update(
"IdeficsConfig": True,
"IdeficsVisionConfig": True,
"IdeficsPerceiverConfig": True,
# TODO: @Arthur/Joao (`hidden_act` unused)
"GptOssConfig": True,
}
)