mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-21 17:48:57 +08:00
Compare commits
24 Commits
v4.55.0
...
vision_vis
Author | SHA1 | Date | |
---|---|---|---|
b356fce1da | |||
af7f75e682 | |||
34ba5909a2 | |||
fbec904fb0 | |||
a1263dfe7b | |||
1878d6c4ff | |||
cb2e0df2ec | |||
9ab75fc428 | |||
43b3f58875 | |||
dff6185d61 | |||
c7844c7a8e | |||
dd70a8cb9d | |||
82eb67e62a | |||
9e76a6bb54 | |||
910b319357 | |||
369c99d0ce | |||
b771e476a8 | |||
eb6e26acf3 | |||
a6a18efe53 | |||
e581d2f2ce | |||
1f6822d114 | |||
edb70ae15c | |||
27bc371bea | |||
58c619e809 |
@ -5,7 +5,7 @@ ARG REF=main
|
||||
RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools GitPython
|
||||
RUN uv pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir --upgrade 'torch<2.8' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
|
||||
# tensorflow pin matching setup.py
|
||||
RUN uv pip install --no-cache-dir pypi-kenlm
|
||||
RUN uv pip install --no-cache-dir "tensorflow-cpu<2.16" "tf-keras<2.16"
|
||||
|
@ -16,7 +16,7 @@ RUN cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local
|
||||
RUN make install -j 10
|
||||
|
||||
|
||||
RUN uv pip install --no-cache --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache --upgrade 'torch<2.8' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ja,testing,sentencepiece,jieba,spacy,ftfy,rjieba]" unidic unidic-lite
|
||||
# spacy is not used so not tested. Causes to failures. TODO fix later
|
||||
|
@ -5,7 +5,7 @@ USER root
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git ffmpeg
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch<2.8' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" seqeval albumentations jiwer
|
||||
RUN uv pip uninstall transformers
|
||||
|
@ -5,7 +5,7 @@ USER root
|
||||
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git libgl1-mesa-glx libgl1 g++ tesseract-ocr
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch<2.8' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir --no-deps timm accelerate
|
||||
RUN pip install -U --upgrade-strategy eager --no-cache-dir pytesseract python-Levenshtein opencv-python nltk
|
||||
# RUN uv pip install --no-cache-dir natten==0.15.1+torch210cpu -f https://shi-labs.com/natten/wheels
|
||||
|
@ -5,7 +5,7 @@ USER root
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git ffmpeg
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch<2.8' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]"
|
||||
RUN uv pip uninstall transformers
|
||||
|
@ -6,7 +6,7 @@ RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git g++
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-deps accelerate
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch<2.8' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,audio,sklearn,sentencepiece,vision,testing]"
|
||||
|
||||
|
||||
|
@ -5,7 +5,7 @@ USER root
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git git-lfs ffmpeg
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch<2.8' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken,num2words,video]"
|
||||
RUN uv pip uninstall transformers
|
||||
|
@ -7,7 +7,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-de
|
||||
ENV UV_PYTHON=/usr/local/bin/python
|
||||
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
|
||||
RUN uv pip install --no-cache-dir --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN uv pip install --no-cache-dir 'torch<2.8' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
|
||||
RUN git lfs install
|
||||
|
||||
RUN uv pip install --no-cache-dir pypi-kenlm
|
||||
|
@ -511,6 +511,8 @@
|
||||
title: GPT2
|
||||
- local: model_doc/gpt_bigcode
|
||||
title: GPTBigCode
|
||||
- local: model_doc/gpt_oss
|
||||
title: GptOss
|
||||
- local: model_doc/gptsan-japanese
|
||||
title: GPTSAN Japanese
|
||||
- local: model_doc/gpt-sw3
|
||||
@ -617,8 +619,6 @@
|
||||
title: OLMoE
|
||||
- local: model_doc/open-llama
|
||||
title: Open-Llama
|
||||
- local: model_doc/openai_moe
|
||||
title: OpenAIMoe
|
||||
- local: model_doc/opt
|
||||
title: OPT
|
||||
- local: model_doc/pegasus
|
||||
|
@ -65,6 +65,10 @@ Learn how to quantize models in the [Quantization](../quantization) guide.
|
||||
|
||||
[[autodoc]] HqqConfig
|
||||
|
||||
## Mxfp4Config
|
||||
|
||||
[[autodoc]] Mxfp4Config
|
||||
|
||||
## FbgemmFp8Config
|
||||
|
||||
[[autodoc]] FbgemmFp8Config
|
||||
|
@ -24,11 +24,11 @@ rendered properly in your Markdown viewer.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
# OpenAIMoE
|
||||
# GptOss
|
||||
|
||||
## Overview
|
||||
|
||||
The OpenAIMoE model was proposed in [<INSERT PAPER NAME HERE>](<INSERT PAPER LINK HERE>) by <INSERT AUTHORS HERE>.
|
||||
The GptOss model was proposed in [<INSERT PAPER NAME HERE>](<INSERT PAPER LINK HERE>) by <INSERT AUTHORS HERE>.
|
||||
<INSERT SHORT SUMMARY HERE>
|
||||
|
||||
The abstract from the paper is the following:
|
||||
@ -43,16 +43,16 @@ This model was contributed by [INSERT YOUR HF USERNAME HERE](https://huggingface
|
||||
The original code can be found [here](<INSERT LINK TO GITHUB REPO HERE>).
|
||||
|
||||
|
||||
## OpenAIMoeConfig
|
||||
## GptOssConfig
|
||||
|
||||
[[autodoc]] OpenAIMoeConfig
|
||||
[[autodoc]] GptOssConfig
|
||||
|
||||
## OpenAIMoeModel
|
||||
## GptOssModel
|
||||
|
||||
[[autodoc]] OpenAIMoeModel
|
||||
[[autodoc]] GptOssModel
|
||||
- forward
|
||||
|
||||
## OpenAIMoeForCausalLM
|
||||
## GptOssForCausalLM
|
||||
|
||||
[[autodoc]] OpenAIMoeForCausalLM
|
||||
[[autodoc]] GptOssForCausalLM
|
||||
- forward
|
@ -16,7 +16,7 @@ rendered properly in your Markdown viewer.
|
||||
|
||||
# FP-Quant
|
||||
|
||||
[FP-Quant](https://github.com/IST-DASLab/FP-Quant) is a family of quantization algorithms tailored for the Blackwell generation of Nvidia GPUs. The goal is to allow for efficient post-training quantization (PTQ) and quantization-aware trainin (QAT) of LLMs in the [MXFP4 and NVFP4 data-types](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf).
|
||||
[FP-Quant](https://github.com/IST-DASLab/FP-Quant) is a family of quantization algorithms tailored for the Blackwell generation of Nvidia GPUs. The goal is to allow for efficient post-training quantization (PTQ) and quantization-aware training (QAT) of LLMs in the [MXFP4 and NVFP4 data-types](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf).
|
||||
|
||||
Currently, only PTQ with MXFP4 is supported. Models can either be quantized on the fly with `quantization_config=FPQuantConfig()`:
|
||||
|
||||
@ -63,4 +63,4 @@ model.forward = torch.compile(model.forward, mode="max-autotune", fullgraph=True
|
||||
|
||||
FP-Quant currently performs best for very large batch size processing.
|
||||
|
||||
See [QuTLASS README](https://github.com/IST-DASLab/qutlass/blob/main/README.md) for speedups.
|
||||
See [QuTLASS README](https://github.com/IST-DASLab/qutlass/blob/main/README.md) for speedups.
|
||||
|
@ -327,8 +327,6 @@
|
||||
title: (번역중) Contribute new quantization method
|
||||
title: (번역중) 경량화 메소드
|
||||
- sections:
|
||||
- local: performance
|
||||
title: 성능 및 확장성
|
||||
- local: in_translation
|
||||
title: (번역중) Quantization
|
||||
- local: llm_optims
|
||||
@ -348,8 +346,6 @@
|
||||
title: CPU에서 훈련
|
||||
- local: perf_train_cpu_many
|
||||
title: 다중 CPU에서 훈련하기
|
||||
- local: perf_train_tpu_tf
|
||||
title: TensorFlow로 TPU에서 훈련하기
|
||||
- local: perf_train_special
|
||||
title: Apple 실리콘에서 PyTorch 학습
|
||||
- local: perf_hardware
|
||||
@ -363,12 +359,8 @@
|
||||
- local: perf_infer_gpu_one
|
||||
title: 하나의 GPU를 활용한 추론
|
||||
title: 추론 최적화하기
|
||||
- local: big_models
|
||||
title: 대형 모델을 인스턴스화
|
||||
- local: debugging
|
||||
title: 디버깅
|
||||
- local: tf_xla
|
||||
title: TensorFlow 모델을 위한 XLA 통합
|
||||
- local: in_translation
|
||||
title: (번역중) Optimize inference using `torch.compile()`
|
||||
title: (번역중) 성능 및 확장성
|
||||
|
@ -60,7 +60,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
Array = Any
|
||||
Dataset = datasets.arrow_dataset.Dataset
|
||||
|
@ -59,7 +59,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risk.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/flax/speech-recognition/requirements.txt")
|
||||
|
||||
|
@ -55,7 +55,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
Array = Any
|
||||
Dataset = datasets.arrow_dataset.Dataset
|
||||
|
@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")
|
||||
|
||||
|
@ -55,7 +55,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.14.0", "To fix: pip install -r examples/pytorch/audio-classification/requirements.txt")
|
||||
|
||||
|
@ -63,7 +63,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")
|
||||
|
||||
|
@ -68,7 +68,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")
|
||||
|
||||
|
@ -61,7 +61,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -51,7 +51,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")
|
||||
|
||||
|
@ -56,7 +56,7 @@ Any model supported by the AutoModelForMaskedImageModeling API can be used.
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")
|
||||
|
||||
|
@ -61,7 +61,7 @@ Any model supported by the AutoModelForMaskedImageModeling API can be used.
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")
|
||||
|
||||
|
@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/instance-segmentation/requirements.txt")
|
||||
|
||||
|
@ -63,7 +63,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/instance-segmentation/requirements.txt")
|
||||
|
||||
|
@ -69,7 +69,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
|
@ -71,7 +71,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -72,7 +72,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
|
@ -74,7 +74,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -68,7 +68,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
|
@ -71,7 +71,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
@ -61,7 +61,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
|
@ -57,7 +57,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -65,7 +65,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
# You should update this to your particular problem to have better documentation of `model_type`
|
||||
|
@ -59,7 +59,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/object-detection/requirements.txt")
|
||||
|
||||
|
@ -63,7 +63,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = get_logger(__name__)
|
||||
|
@ -49,7 +49,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -47,7 +47,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -54,7 +54,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -45,7 +45,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -62,7 +62,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/semantic-segmentation/requirements.txt")
|
||||
|
||||
|
@ -62,7 +62,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -61,7 +61,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
|
||||
|
||||
|
@ -64,7 +64,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
|
||||
|
||||
|
@ -60,7 +60,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
|
||||
|
||||
|
@ -67,7 +67,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
||||
|
@ -71,7 +71,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
@ -61,7 +61,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
|
||||
|
||||
|
@ -63,7 +63,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
|
||||
|
||||
|
@ -63,7 +63,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -62,7 +62,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
|
||||
|
||||
|
@ -60,7 +60,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")
|
||||
|
||||
|
@ -67,7 +67,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")
|
||||
|
@ -66,7 +66,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")
|
||||
|
||||
|
@ -71,7 +71,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")
|
||||
|
@ -50,7 +50,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version(
|
||||
"datasets>=1.8.0", "To fix: pip install -r examples/tensorflow/contrastive-image-text/requirements.txt"
|
||||
|
@ -54,7 +54,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")
|
||||
|
||||
|
@ -49,7 +49,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -61,7 +61,7 @@ except (ModuleNotFoundError, ImportError):
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -52,7 +52,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
# region Checking dependencies
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
||||
|
@ -46,7 +46,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
task_to_keys = {
|
||||
"cola": ("sentence", None),
|
||||
|
@ -55,7 +55,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
# region Dependencies and constants
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.55.0.dev0")
|
||||
check_min_version("4.56.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
||||
|
2
setup.py
2
setup.py
@ -463,7 +463,7 @@ install_requires = [
|
||||
|
||||
setup(
|
||||
name="transformers",
|
||||
version="4.55.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="4.56.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
|
||||
author_email="transformers@huggingface.co",
|
||||
description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",
|
||||
|
@ -18,7 +18,7 @@
|
||||
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
|
||||
# in the namespace without actually importing anything (and especially none of the backends).
|
||||
|
||||
__version__ = "4.55.0.dev0"
|
||||
__version__ = "4.56.0.dev0"
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
@ -452,6 +452,105 @@ def normalize(
|
||||
return image
|
||||
|
||||
|
||||
def unnormalize(
|
||||
image: Union[np.ndarray, "torch.Tensor"],
|
||||
mean: Union[float, Collection[float]],
|
||||
std: Union[float, Collection[float]],
|
||||
data_format: Optional[ChannelDimension] = None,
|
||||
input_data_format: Optional[Union[str, ChannelDimension]] = None,
|
||||
):
|
||||
"""
|
||||
Inverse of `normalize`:
|
||||
|
||||
image = image * std + mean
|
||||
|
||||
Accepts NumPy arrays or PyTorch tensors and mirrors `normalize`'s API,
|
||||
but also handles 4D/5D by broadcasting along the channel axis and
|
||||
collapsing leading batch dims. Defaults to NHWC output for visualization.
|
||||
"""
|
||||
# type check
|
||||
is_np = isinstance(image, np.ndarray)
|
||||
is_torch = isinstance(image, torch.Tensor)
|
||||
if not (is_np or is_torch):
|
||||
raise TypeError("image must be a numpy array or a torch tensor")
|
||||
|
||||
# infer layout
|
||||
if input_data_format is None:
|
||||
input_data_format = infer_channel_dimension_format(image)
|
||||
|
||||
# cast policy (match normalize): cast only if not floating
|
||||
if is_np:
|
||||
if not np.issubdtype(image.dtype, np.floating):
|
||||
image = image.astype(np.float32)
|
||||
else:
|
||||
if not image.is_floating_point():
|
||||
image = image.float()
|
||||
|
||||
# channel axis and sizes
|
||||
ch_axis = get_channel_dimension_axis(image, input_data_format=input_data_format)
|
||||
num_channels = int(image.shape[ch_axis])
|
||||
|
||||
# normalize mean/std to per-channel vectors
|
||||
def _as_seq(x, n):
|
||||
if isinstance(x, Collection):
|
||||
if len(x) != n:
|
||||
raise ValueError(f"value must have {n} elements if it is an iterable, got {len(x)}")
|
||||
return x
|
||||
return [x] * n
|
||||
|
||||
mean_seq = _as_seq(mean, num_channels)
|
||||
std_seq = _as_seq(std, num_channels)
|
||||
|
||||
# make broadcastable tensors/arrays shaped [1, ..., C (at ch_axis), ..., 1]
|
||||
bshape = [1] * image.ndim
|
||||
bshape[ch_axis] = num_channels
|
||||
|
||||
if is_np:
|
||||
mean_arr = np.asarray(mean_seq, dtype=image.dtype).reshape(bshape)
|
||||
std_arr = np.asarray(std_seq, dtype=image.dtype).reshape(bshape)
|
||||
image = image * std_arr + mean_arr
|
||||
else:
|
||||
mean_arr = torch.as_tensor(mean_seq, dtype=image.dtype, device=image.device).view(bshape)
|
||||
std_arr = torch.as_tensor(std_seq, dtype=image.dtype, device=image.device).view(bshape)
|
||||
image = image * std_arr + mean_arr
|
||||
|
||||
# convert to numpy for plotting
|
||||
if is_torch:
|
||||
image = image.detach().cpu().numpy()
|
||||
is_np = True # from here on
|
||||
|
||||
# target layout: default to NHWC so downstream viz works out of the box
|
||||
target_format = data_format or ChannelDimension.LAST
|
||||
|
||||
# collapse any leading batch dims into one, preserving (C,H,W) or (H,W,C)
|
||||
if input_data_format == ChannelDimension.FIRST:
|
||||
# layout: [*, C, H, W]
|
||||
lead = int(np.prod(image.shape[: image.ndim - 3])) if image.ndim > 3 else 1
|
||||
if image.ndim == 3:
|
||||
c, h, w = image.shape
|
||||
image = image.reshape(1, c, h, w)
|
||||
lead = 1
|
||||
else:
|
||||
c, h, w = image.shape[-3:]
|
||||
image = image.reshape(lead, c, h, w)
|
||||
if target_format == ChannelDimension.LAST:
|
||||
image = np.transpose(image, (0, 2, 3, 1)) # -> [N, H, W, C]
|
||||
else:
|
||||
# layout: [*, H, W, C]
|
||||
lead = int(np.prod(image.shape[: image.ndim - 3])) if image.ndim > 3 else 1
|
||||
if image.ndim == 3:
|
||||
h, w, c = image.shape
|
||||
image = image.reshape(1, h, w, c)
|
||||
lead = 1
|
||||
else:
|
||||
h, w, c = image.shape[-3:]
|
||||
image = image.reshape(lead, h, w, c)
|
||||
if target_format == ChannelDimension.FIRST:
|
||||
image = np.transpose(image, (0, 3, 1, 2)) # -> [N, C, H, W]
|
||||
|
||||
return image
|
||||
|
||||
|
||||
def center_crop(
|
||||
image: np.ndarray,
|
||||
size: tuple[int, int],
|
||||
|
@ -264,8 +264,8 @@ def routing_torch_dist(
|
||||
|
||||
expt_data = compute_expt_data_torch(hist, n_local_experts, n_gates_pad)
|
||||
|
||||
hitted_experts = n_expts_act
|
||||
return RoutingData(gate_scal, hist, n_local_experts, hitted_experts, expt_data), gather_indx, scatter_indx
|
||||
hit_experts = n_expts_act
|
||||
return RoutingData(gate_scal, hist, n_local_experts, hit_experts, expt_data), gather_indx, scatter_indx
|
||||
|
||||
|
||||
def mlp_forward(self, hidden_states):
|
||||
@ -280,7 +280,10 @@ def mlp_forward(self, hidden_states):
|
||||
batch_size = hidden_states.shape[0]
|
||||
hidden_states = hidden_states.reshape(-1, self.router.hidden_dim)
|
||||
router_logits = nn.functional.linear(hidden_states, self.router.weight, self.router.bias)
|
||||
routing_data, gather_idx, scatter_idx = routing(router_logits, self.router.top_k)
|
||||
|
||||
with torch.cuda.device(router_logits.device):
|
||||
routing_data, gather_idx, scatter_idx = routing(router_logits, self.router.top_k)
|
||||
|
||||
routed_out = self.experts(hidden_states, routing_data, gather_idx, scatter_idx)
|
||||
routed_out = routed_out.reshape(batch_size, -1, self.router.hidden_dim)
|
||||
return routed_out, router_logits
|
||||
|
@ -356,8 +356,8 @@ class Ernie4_5_MoeSparseMoeBlock(nn.Module):
|
||||
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
|
||||
|
||||
# Loop over all available experts in the model and perform the computation on each expert
|
||||
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hitted:
|
||||
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hit:
|
||||
expert_layer = self.experts[expert_idx]
|
||||
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))
|
||||
|
||||
|
@ -167,8 +167,8 @@ class Ernie4_5_MoeSparseMoeBlock(nn.Module):
|
||||
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
|
||||
|
||||
# Loop over all available experts in the model and perform the computation on each expert
|
||||
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hitted:
|
||||
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hit:
|
||||
expert_layer = self.experts[expert_idx]
|
||||
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))
|
||||
|
||||
|
@ -141,7 +141,7 @@ class Glm4vImageProcessor(BaseImageProcessor):
|
||||
super().__init__(**kwargs)
|
||||
if size is not None and ("shortest_edge" not in size or "longest_edge" not in size):
|
||||
raise ValueError("size must contain 'shortest_edge' and 'longest_edge' keys.")
|
||||
else:
|
||||
elif size is None:
|
||||
size = {"shortest_edge": 112 * 112, "longest_edge": 28 * 28 * 15000}
|
||||
self.size = size
|
||||
|
||||
|
@ -75,7 +75,7 @@ class GptOssExperts(nn.Module):
|
||||
|
||||
def forward(self, hidden_states: torch.Tensor, router_indices=None, routing_weights=None) -> torch.Tensor:
|
||||
"""
|
||||
When training is is more efficient to just loop over the experts and compute the output for each expert
|
||||
When training it is more efficient to just loop over the experts and compute the output for each expert
|
||||
as otherwise the memory would explode.
|
||||
|
||||
For inference we can sacrifice some memory and compute the output for all experts at once. By repeating the inputs.
|
||||
@ -97,8 +97,8 @@ class GptOssExperts(nn.Module):
|
||||
expert_mask = expert_mask.permute(2, 1, 0)
|
||||
# we sum on the top_k and on the sequence lenght to get which experts
|
||||
# are hit this time around
|
||||
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hitted[:]:
|
||||
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hit[:]:
|
||||
with torch.no_grad():
|
||||
_, token_idx = torch.where(expert_mask[expert_idx[0]])
|
||||
current_state = hidden_states[token_idx]
|
||||
|
@ -73,7 +73,7 @@ class GptOssExperts(nn.Module):
|
||||
|
||||
def forward(self, hidden_states: torch.Tensor, router_indices=None, routing_weights=None) -> torch.Tensor:
|
||||
"""
|
||||
When training is is more efficient to just loop over the experts and compute the output for each expert
|
||||
When training it is more efficient to just loop over the experts and compute the output for each expert
|
||||
as otherwise the memory would explode.
|
||||
|
||||
For inference we can sacrifice some memory and compute the output for all experts at once. By repeating the inputs.
|
||||
@ -95,8 +95,8 @@ class GptOssExperts(nn.Module):
|
||||
expert_mask = expert_mask.permute(2, 1, 0)
|
||||
# we sum on the top_k and on the sequence lenght to get which experts
|
||||
# are hit this time around
|
||||
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hitted[:]:
|
||||
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hit[:]:
|
||||
with torch.no_grad():
|
||||
_, token_idx = torch.where(expert_mask[expert_idx[0]])
|
||||
current_state = hidden_states[token_idx]
|
||||
|
@ -40,7 +40,7 @@ if is_torch_flex_attn_available():
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
# Copied from transformers.models.jetmoe.modeling_jetmoe.load_balancing_loss_func
|
||||
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func
|
||||
def load_balancing_loss_func(
|
||||
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
|
||||
num_experts: Optional[int] = None,
|
||||
@ -119,7 +119,8 @@ def load_balancing_loss_func(
|
||||
router_per_expert_attention_mask, dim=0
|
||||
)
|
||||
|
||||
rank = routing_weights.shape[1] * int(routing_weights.device.index)
|
||||
device_index = routing_weights.device.index if routing_weights.device.index is not None else 0
|
||||
rank = routing_weights.shape[1] * int(device_index)
|
||||
overall_loss = torch.sum(
|
||||
tokens_per_expert[:, rank : rank + routing_weights.shape[1]] * router_prob_per_expert.unsqueeze(0)
|
||||
)
|
||||
|
@ -1647,7 +1647,8 @@ def load_balancing_loss_func(
|
||||
router_per_expert_attention_mask, dim=0
|
||||
)
|
||||
|
||||
rank = routing_weights.shape[1] * int(routing_weights.device.index)
|
||||
device_index = routing_weights.device.index if routing_weights.device.index is not None else 0
|
||||
rank = routing_weights.shape[1] * int(device_index)
|
||||
overall_loss = torch.sum(
|
||||
tokens_per_expert[:, rank : rank + routing_weights.shape[1]] * router_prob_per_expert.unsqueeze(0)
|
||||
)
|
||||
|
@ -918,7 +918,8 @@ def load_balancing_loss_func(
|
||||
router_per_expert_attention_mask, dim=0
|
||||
)
|
||||
|
||||
rank = routing_weights.shape[1] * int(routing_weights.device.index)
|
||||
device_index = routing_weights.device.index if routing_weights.device.index is not None else 0
|
||||
rank = routing_weights.shape[1] * int(device_index)
|
||||
overall_loss = torch.sum(
|
||||
tokens_per_expert[:, rank : rank + routing_weights.shape[1]] * router_prob_per_expert.unsqueeze(0)
|
||||
)
|
||||
|
@ -67,7 +67,7 @@ is_fast_path_available = all(
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func with gate->router
|
||||
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func with gate->router
|
||||
def load_balancing_loss_func(
|
||||
router_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
|
||||
num_experts: Optional[int] = None,
|
||||
@ -148,7 +148,8 @@ def load_balancing_loss_func(
|
||||
router_per_expert_attention_mask, dim=0
|
||||
)
|
||||
|
||||
rank = routing_weights.shape[1] * int(routing_weights.device.index)
|
||||
device_index = routing_weights.device.index if routing_weights.device.index is not None else 0
|
||||
rank = routing_weights.shape[1] * int(device_index)
|
||||
overall_loss = torch.sum(
|
||||
tokens_per_expert[:, rank : rank + routing_weights.shape[1]] * router_prob_per_expert.unsqueeze(0)
|
||||
)
|
||||
|
@ -50,7 +50,7 @@ if is_flash_attn_available():
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
|
||||
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func
|
||||
def load_balancing_loss_func(
|
||||
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
|
||||
num_experts: Optional[int] = None,
|
||||
@ -129,7 +129,8 @@ def load_balancing_loss_func(
|
||||
router_per_expert_attention_mask, dim=0
|
||||
)
|
||||
|
||||
rank = routing_weights.shape[1] * int(routing_weights.device.index)
|
||||
device_index = routing_weights.device.index if routing_weights.device.index is not None else 0
|
||||
rank = routing_weights.shape[1] * int(device_index)
|
||||
overall_loss = torch.sum(
|
||||
tokens_per_expert[:, rank : rank + routing_weights.shape[1]] * router_prob_per_expert.unsqueeze(0)
|
||||
)
|
||||
|
@ -465,8 +465,8 @@ class MiniMaxSparseMoeBlock(nn.Module):
|
||||
# this will be used to easily index which expert is going to be sollicitated
|
||||
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
|
||||
|
||||
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hitted:
|
||||
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hit:
|
||||
expert_layer = self.experts[expert_idx]
|
||||
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))
|
||||
# Index the correct hidden states and compute the expert hidden state for
|
||||
|
@ -121,8 +121,8 @@ class MixtralSparseMoeBlock(nn.Module):
|
||||
# this will be used to easily index which expert is going to be sollicitated
|
||||
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
|
||||
|
||||
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hitted:
|
||||
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hit:
|
||||
expert_layer = self.experts[expert_idx]
|
||||
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))
|
||||
# Index the correct hidden states and compute the expert hidden state for
|
||||
|
@ -201,8 +201,8 @@ class MixtralSparseMoeBlock(nn.Module):
|
||||
# this will be used to easily index which expert is going to be sollicitated
|
||||
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
|
||||
|
||||
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hitted:
|
||||
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hit:
|
||||
expert_layer = self.experts[expert_idx]
|
||||
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))
|
||||
# Index the correct hidden states and compute the expert hidden state for
|
||||
|
@ -39,7 +39,7 @@ if is_flash_attn_available():
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
|
||||
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func
|
||||
def load_balancing_loss_func(
|
||||
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
|
||||
num_experts: Optional[int] = None,
|
||||
@ -118,7 +118,8 @@ def load_balancing_loss_func(
|
||||
router_per_expert_attention_mask, dim=0
|
||||
)
|
||||
|
||||
rank = routing_weights.shape[1] * int(routing_weights.device.index)
|
||||
device_index = routing_weights.device.index if routing_weights.device.index is not None else 0
|
||||
rank = routing_weights.shape[1] * int(device_index)
|
||||
overall_loss = torch.sum(
|
||||
tokens_per_expert[:, rank : rank + routing_weights.shape[1]] * router_prob_per_expert.unsqueeze(0)
|
||||
)
|
||||
|
@ -55,7 +55,7 @@ _prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_m
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
|
||||
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func
|
||||
def load_balancing_loss_func(
|
||||
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
|
||||
num_experts: Optional[int] = None,
|
||||
@ -134,7 +134,8 @@ def load_balancing_loss_func(
|
||||
router_per_expert_attention_mask, dim=0
|
||||
)
|
||||
|
||||
rank = routing_weights.shape[1] * int(routing_weights.device.index)
|
||||
device_index = routing_weights.device.index if routing_weights.device.index is not None else 0
|
||||
rank = routing_weights.shape[1] * int(device_index)
|
||||
overall_loss = torch.sum(
|
||||
tokens_per_expert[:, rank : rank + routing_weights.shape[1]] * router_prob_per_expert.unsqueeze(0)
|
||||
)
|
||||
|
@ -59,7 +59,6 @@ if is_torch_flex_attn_available():
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
|
||||
def load_balancing_loss_func(
|
||||
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
|
||||
num_experts: Optional[int] = None,
|
||||
@ -138,7 +137,8 @@ def load_balancing_loss_func(
|
||||
router_per_expert_attention_mask, dim=0
|
||||
)
|
||||
|
||||
rank = routing_weights.shape[1] * int(routing_weights.device.index)
|
||||
device_index = routing_weights.device.index if routing_weights.device.index is not None else 0
|
||||
rank = routing_weights.shape[1] * int(device_index)
|
||||
overall_loss = torch.sum(
|
||||
tokens_per_expert[:, rank : rank + routing_weights.shape[1]] * router_prob_per_expert.unsqueeze(0)
|
||||
)
|
||||
@ -621,8 +621,8 @@ class Qwen2MoeSparseMoeBlock(nn.Module):
|
||||
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
|
||||
|
||||
# Loop over all available experts in the model and perform the computation on each expert
|
||||
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hitted:
|
||||
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hit:
|
||||
expert_layer = self.experts[expert_idx]
|
||||
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))
|
||||
|
||||
|
@ -244,8 +244,8 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
|
||||
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
|
||||
|
||||
# Loop over all available experts in the model and perform the computation on each expert
|
||||
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hitted:
|
||||
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hit:
|
||||
expert_layer = self.experts[expert_idx]
|
||||
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))
|
||||
|
||||
|
@ -100,8 +100,8 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
|
||||
expert_mask = torch.nn.functional.one_hot(selected_experts, num_classes=self.num_experts).permute(2, 1, 0)
|
||||
|
||||
# Loop over all available experts in the model and perform the computation on each expert
|
||||
expert_hitted = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hitted:
|
||||
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
|
||||
for expert_idx in expert_hit:
|
||||
expert_layer = self.experts[expert_idx]
|
||||
idx, top_x = torch.where(expert_mask[expert_idx].squeeze(0))
|
||||
|
||||
|
@ -56,33 +56,44 @@ class Mxfp4HfQuantizer(HfQuantizer):
|
||||
"Using mxfp4 quantization requires torch"
|
||||
"Please install the latest version of torch ( pip install --upgrade torch )"
|
||||
)
|
||||
|
||||
if self.quantization_config.dequantize:
|
||||
return
|
||||
|
||||
if not torch.cuda.is_available():
|
||||
raise RuntimeError("Using MXFP4 quantized models requires a GPU")
|
||||
|
||||
if not is_accelerate_available():
|
||||
raise ImportError("Using mxfp4 requires Accelerate: `pip install accelerate`")
|
||||
|
||||
if self.quantization_config.dequantize:
|
||||
return
|
||||
|
||||
compute_capability = torch.cuda.get_device_capability()
|
||||
major, minor = compute_capability
|
||||
gpu_is_supported = compute_capability >= (7, 5)
|
||||
kernels_available = is_triton_available("3.4.0") and is_triton_kernels_availalble()
|
||||
|
||||
if not is_triton_available("3.4.0") or not is_triton_kernels_availalble():
|
||||
if self.pre_quantized and not self.quantization_config.dequantize:
|
||||
if self.pre_quantized:
|
||||
# On unsupported GPUs or without kernels, we will dequantize the model to bf16
|
||||
if not gpu_is_supported:
|
||||
logger.warning_once(
|
||||
"MXFP4 quantization is only supported on GPUs with compute capability >= 7.5 (e.g T4, A100, L4, H100, or B200). "
|
||||
"We will default to dequantizing the model to bf16."
|
||||
)
|
||||
self.quantization_config.dequantize = True
|
||||
return
|
||||
|
||||
if not kernels_available:
|
||||
logger.warning_once(
|
||||
"MXFP4 quantization requires triton >= 3.4.0 and triton_kernels installed, we will default to dequantizing the model to bf16"
|
||||
)
|
||||
self.quantization_config.dequantize = True
|
||||
return
|
||||
else:
|
||||
# we can't quantize the model in this case so we raise an error
|
||||
raise ValueError("MXFP4 quantization requires triton >= 3.4.0 and triton_kernels installed")
|
||||
|
||||
if major < 9:
|
||||
elif not gpu_is_supported:
|
||||
# we can't quantize the model in this case so we raise an error
|
||||
raise ValueError(
|
||||
"MXFP4 quantized models is only supported on GPUs with compute capability >= 9.0 (e.g H100, or B100)"
|
||||
"MXFP4 quantization is only supported on GPUs with compute capability >= 7.5 (e.g T4, A100, L4, H100, or B200)"
|
||||
)
|
||||
elif not kernels_available:
|
||||
# we can't quantize the model in this case so we raise an error
|
||||
raise ValueError("MXFP4 quantization requires triton >= 3.4.0 and triton_kernels installed")
|
||||
|
||||
device_map = kwargs.get("device_map", None)
|
||||
if device_map is None:
|
||||
|
373
src/transformers/utils/processor_visualizer_utils.py
Normal file
373
src/transformers/utils/processor_visualizer_utils.py
Normal file
@ -0,0 +1,373 @@
|
||||
import re
|
||||
from typing import Optional, Union
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import requests
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
from ..image_transforms import convert_to_rgb, to_pil_image, unnormalize
|
||||
from ..models.auto import AutoConfig, AutoProcessor
|
||||
|
||||
|
||||
# archs failing that should raise immediately for this util:
|
||||
|
||||
INCOMPATIBLE_MODELS = [
|
||||
"bit",
|
||||
"colpali",
|
||||
"colqwen2",
|
||||
"convnext",
|
||||
"d_fine",
|
||||
"data2vec",
|
||||
"efficientloftr",
|
||||
"efficientnet",
|
||||
"fuyu",
|
||||
"gemma3",
|
||||
"glm4v",
|
||||
"glpn",
|
||||
"hgnet_v2",
|
||||
"hiera",
|
||||
"internvl",
|
||||
"janus",
|
||||
"layoutlmv3",
|
||||
"levit",
|
||||
"lightglue",
|
||||
"llama4",
|
||||
"mistral3",
|
||||
"mllama",
|
||||
"mobilevit",
|
||||
"mobilevitv2",
|
||||
"musicgen",
|
||||
"musicgen_melody",
|
||||
"oneformer",
|
||||
"perceiver",
|
||||
"perception_lm",
|
||||
"phi4_multimodal",
|
||||
"qwen2_5_omni",
|
||||
"qwen2_5_vl",
|
||||
"qwen2_vl",
|
||||
"regnet",
|
||||
"resnet",
|
||||
"superglue",
|
||||
"superpoint",
|
||||
"swin2sr",
|
||||
"timm_wrapper",
|
||||
"tvp",
|
||||
"udop",
|
||||
"vitmatte",
|
||||
"vitpose",
|
||||
"vjepa2",
|
||||
"whisper",
|
||||
]
|
||||
|
||||
|
||||
DEFAULT_IMAGE_URL = (
|
||||
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/hf-logo-224x224.png"
|
||||
)
|
||||
|
||||
|
||||
def _looks_like_global(tile: np.ndarray, base: Image.Image, *, mae_tol: float = 0.3) -> bool:
|
||||
"""
|
||||
Very simple visualizer heuristic.
|
||||
"""
|
||||
base_r = base.convert("RGB").resize(tile.shape[:2][::-1], Image.BILINEAR)
|
||||
base_np = np.asarray(base_r).astype(np.float32) / 255.0
|
||||
|
||||
tile_f32 = tile.astype(np.float32)
|
||||
if tile_f32.max() > 1.5:
|
||||
tile_f32 /= 255.0
|
||||
|
||||
mae = np.abs(tile_f32 - base_np).mean()
|
||||
return mae < mae_tol
|
||||
|
||||
|
||||
class ImageVisualizer:
|
||||
def __init__(self, repo_id: str):
|
||||
self.processor = AutoProcessor.from_pretrained(repo_id, trust_remote_code=False)
|
||||
self.config = AutoConfig.from_pretrained(repo_id, trust_remote_code=False)
|
||||
|
||||
if hasattr(self.processor, "image_processor"):
|
||||
image_processor = self.processor.image_processor
|
||||
elif hasattr(self.processor, "image_mean"):
|
||||
image_processor = self.processor # weak test, but works most of the time
|
||||
else:
|
||||
raise ValueError(f"No image processor found for {repo_id}.")
|
||||
|
||||
self.channel_means = getattr(image_processor, "image_mean", [0.485, 0.456, 0.406])
|
||||
self.channel_stds = getattr(image_processor, "image_std", [0.229, 0.224, 0.225])
|
||||
if hasattr(self.processor, "image_token"):
|
||||
self.image_token_marker = self.processor.image_token
|
||||
elif hasattr(self.processor, "image_token_id"):
|
||||
self.image_token_marker = self.processor.decode(self.processor.image_token_id)
|
||||
else:
|
||||
self.image_token_marker = "<image>"
|
||||
|
||||
self.default_prompt = f"{self.image_token_marker} How does it look?"
|
||||
|
||||
self.vision_config = getattr(self.config, "vision_config", None)
|
||||
self.patch_size = getattr(self.vision_config, "patch_size", getattr(image_processor, "patch_size", 14))
|
||||
self.merge_size = getattr(image_processor, "merge_size", 1)
|
||||
|
||||
def _pixel_values_as_tensor(
|
||||
self, pixel_values: Union[torch.Tensor, np.ndarray, list[np.ndarray], list[torch.Tensor]]
|
||||
):
|
||||
"""
|
||||
Normalize input to a 4D tensor with shape (batch, channels, height, width).
|
||||
Supports input of shape:
|
||||
- (B, C, H, W)
|
||||
- (B, N, C, H, W) -> flattened to (B*N, C, H, W)
|
||||
- (C, H, W) -> expanded to (1, C, H, W)
|
||||
- list/tuple of arrays or tensors
|
||||
"""
|
||||
if isinstance(pixel_values, (list, tuple)):
|
||||
tensor_list = [pv if isinstance(pv, torch.Tensor) else torch.tensor(pv) for pv in pixel_values]
|
||||
pixel_values = torch.stack(tensor_list, dim=0)
|
||||
|
||||
if not isinstance(pixel_values, torch.Tensor):
|
||||
pixel_values = torch.tensor(pixel_values)
|
||||
|
||||
if pixel_values.ndim == 5:
|
||||
batch_size, num_images, num_channels, height, width = pixel_values.shape
|
||||
pixel_values = pixel_values.view(batch_size * num_images, num_channels, height, width)
|
||||
elif pixel_values.ndim == 4:
|
||||
pass
|
||||
elif pixel_values.ndim == 3:
|
||||
pixel_values = pixel_values.unsqueeze(0)
|
||||
else:
|
||||
raise ValueError(f"Unexpected pixel tensor shape {pixel_values.shape}")
|
||||
|
||||
return pixel_values
|
||||
|
||||
def _display_single_image(self, image_array: np.ndarray, show_patch_grid: bool, figsize=(7, 7)):
|
||||
plt.figure(figsize=figsize)
|
||||
plt.imshow(image_array)
|
||||
plt.xticks([])
|
||||
plt.yticks([])
|
||||
|
||||
if show_patch_grid:
|
||||
height, width = image_array.shape[:2]
|
||||
step = max(1, min(height, width) // self.patch_size)
|
||||
for x_pos in range(0, width, step):
|
||||
plt.axvline(x_pos, color="red", linewidth=0.5)
|
||||
for y_pos in range(0, height, step):
|
||||
plt.axhline(y_pos, color="red", linewidth=0.5)
|
||||
|
||||
caption = f"{width}×{height} | mean={', '.join(f'{m:.3f}' for m in self.channel_means)} | std={', '.join(f'{s:.3f}' for s in self.channel_stds)}"
|
||||
plt.tight_layout()
|
||||
plt.figtext(0.5, -0.02, caption, ha="center", va="top", fontsize=12)
|
||||
plt.show()
|
||||
|
||||
def _display_tiled_images(
|
||||
self,
|
||||
tiles_array: np.ndarray,
|
||||
source_image: Image.Image,
|
||||
rows: Optional[int] = None,
|
||||
cols: Optional[int] = None,
|
||||
aspect_ratio: float = 1.0,
|
||||
add_grid: bool = True,
|
||||
figsize=(7, 7),
|
||||
):
|
||||
"""
|
||||
Display a grid of image tiles. Attempts to detect and preserve the original/global image tile,
|
||||
which is then shown separately at the end.
|
||||
"""
|
||||
num_tiles = tiles_array.shape[0]
|
||||
|
||||
original_tile_index = None
|
||||
saved_original_tile = None
|
||||
|
||||
for idx in (0, num_tiles - 1):
|
||||
if _looks_like_global(tiles_array[idx], source_image):
|
||||
original_tile_index = idx
|
||||
break
|
||||
|
||||
if original_tile_index is not None:
|
||||
saved_original_tile = tiles_array[original_tile_index]
|
||||
tiles_array = np.delete(tiles_array, original_tile_index, axis=0)
|
||||
num_tiles -= 1
|
||||
|
||||
# Infer grid if not specified
|
||||
grid_rows, grid_cols = rows, cols
|
||||
if grid_rows is None or grid_cols is None:
|
||||
if aspect_ratio >= 1:
|
||||
guessed_cols = int(np.ceil(np.sqrt(num_tiles * aspect_ratio)))
|
||||
guessed_rows = int(np.ceil(num_tiles / max(guessed_cols, 1)))
|
||||
else:
|
||||
guessed_rows = int(np.ceil(np.sqrt(num_tiles / max(aspect_ratio, 1e-8))))
|
||||
guessed_cols = int(np.ceil(num_tiles / max(guessed_rows, 1)))
|
||||
grid_rows = grid_rows if grid_rows is not None else guessed_rows
|
||||
grid_cols = grid_cols if grid_cols is not None else guessed_cols
|
||||
|
||||
fig, axes = plt.subplots(grid_rows, grid_cols, figsize=figsize, squeeze=False)
|
||||
tile_index = 0
|
||||
for row_idx in range(grid_rows):
|
||||
for col_idx in range(grid_cols):
|
||||
ax = axes[row_idx, col_idx]
|
||||
if tile_index < num_tiles:
|
||||
tile_image = tiles_array[tile_index]
|
||||
ax.imshow(tile_image)
|
||||
ax.set_xticks([])
|
||||
ax.set_yticks([])
|
||||
|
||||
if add_grid:
|
||||
height, width = tile_image.shape[:2]
|
||||
step = max(1, min(height, width) // self.patch_size)
|
||||
for x_pos in range(0, width, step):
|
||||
ax.axvline(x_pos, color="red", linewidth=0.5)
|
||||
for y_pos in range(0, height, step):
|
||||
ax.axhline(y_pos, color="red", linewidth=0.5)
|
||||
else:
|
||||
ax.axis("off")
|
||||
tile_index += 1
|
||||
|
||||
unique = sorted({f"{t.shape[1]}×{t.shape[0]}" for t in tiles_array})
|
||||
sizes = ", ".join(unique)
|
||||
caption = f"{tiles_array.shape[0]} patches | {sizes} | mean={', '.join(f'{m:.3f}' for m in self.channel_means)} | std={', '.join(f'{s:.3f}' for s in self.channel_stds)}"
|
||||
plt.tight_layout()
|
||||
fig.text(0.5, 0.02, caption, ha="center", va="bottom", fontsize=12)
|
||||
plt.show()
|
||||
|
||||
if saved_original_tile is not None:
|
||||
fig2, ax2 = plt.subplots(figsize=figsize)
|
||||
ax2.imshow(saved_original_tile)
|
||||
ax2.set_xticks([])
|
||||
ax2.set_yticks([])
|
||||
ax2.set_aspect("equal", adjustable="box")
|
||||
fig2.subplots_adjust(left=0, right=1, top=1, bottom=0) # no clipping
|
||||
h0, w0 = saved_original_tile.shape[:2]
|
||||
caption = f"{w0}×{h0} | mean={', '.join(f'{m:.3f}' for m in self.channel_means)} | std={', '.join(f'{s:.3f}' for s in self.channel_stds)}"
|
||||
fig2.text(0.5, 0.02, caption, ha="center", va="bottom", fontsize=12)
|
||||
plt.show()
|
||||
|
||||
def default_message(self, full_output: bool = False) -> str:
|
||||
"""
|
||||
Build a single formatted prompt string using the processor's chat template.
|
||||
Contains one image (HF logo) and one user text message.
|
||||
If available, adds the generation prompt as well.
|
||||
Falls back to a minimal '<image>' string if no template is available.
|
||||
"""
|
||||
# ensure this is a multimodal processor with image + tokenizer
|
||||
if not (
|
||||
hasattr(self.processor, "attributes")
|
||||
and "image_processor" in self.processor.attributes
|
||||
and "tokenizer" in self.processor.attributes
|
||||
):
|
||||
raise RuntimeError(
|
||||
"Processor does not expose both 'image_processor' and 'tokenizer'; cannot build multimodal example."
|
||||
)
|
||||
|
||||
conversation = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/hf-logo-224x224.png",
|
||||
},
|
||||
{"type": "text", "text": "Please describe this image."},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
try:
|
||||
print("For a 224x224 RGB png image: \n")
|
||||
decoded_message = self.processor.batch_decode(
|
||||
self.processor.apply_chat_template(
|
||||
conversation,
|
||||
add_generation_prompt=True,
|
||||
tokenize=True,
|
||||
return_dict=False,
|
||||
truncation=False,
|
||||
),
|
||||
skip_special_tokens=False,
|
||||
)[0]
|
||||
|
||||
image_token_string = getattr(self.processor, "image_token", "<image>")
|
||||
token_escaped = re.escape(image_token_string)
|
||||
image_token_run_pattern = re.compile(rf"(?:{token_escaped})(?:\s*{token_escaped}){{2,}}")
|
||||
|
||||
def compress_image_token_run(match: re.Match) -> str:
|
||||
n_tokens = match.group(0).count(image_token_string)
|
||||
return f"{image_token_string}[...{n_tokens} tokens...]{image_token_string}"
|
||||
|
||||
if full_output:
|
||||
return decoded_message
|
||||
else:
|
||||
return image_token_run_pattern.sub(compress_image_token_run, decoded_message)
|
||||
|
||||
except ValueError:
|
||||
image_token_string = getattr(
|
||||
self.processor,
|
||||
"image_token",
|
||||
getattr(getattr(self.processor, "tokenizer", None), "image_token", "<image>"),
|
||||
)
|
||||
return f"{image_token_string} {'Please describe this image.'}"
|
||||
|
||||
def visualize(
|
||||
self,
|
||||
images: Optional[Union[Image.Image, np.ndarray, str, list[Union[Image.Image, np.ndarray, str]]]] = None,
|
||||
rows: Optional[int] = None,
|
||||
cols: Optional[int] = None,
|
||||
add_grid: bool = True,
|
||||
figsize=(12, 12),
|
||||
):
|
||||
"""
|
||||
Visualize the model-processed image(s). Only single images are supported.
|
||||
If the processor returns multiple tiles, display them in a grid with optional patch grid overlay.
|
||||
"""
|
||||
if images is None:
|
||||
images = Image.open(requests.get(DEFAULT_IMAGE_URL, stream=True).raw)
|
||||
|
||||
if not isinstance(images, list):
|
||||
images = [images]
|
||||
else:
|
||||
if len(images) > 1:
|
||||
raise ValueError(
|
||||
"You passed a list of several images. Only single images are accepted by the visualizer."
|
||||
)
|
||||
|
||||
pil_images = [convert_to_rgb(to_pil_image(x)) for x in images]
|
||||
img_width, img_height = pil_images[0].size
|
||||
aspect_ratio = img_width / max(img_height, 1)
|
||||
|
||||
processed_inputs = self.processor(images=pil_images, text=self.default_prompt, return_tensors="pt")
|
||||
pixel_values = processed_inputs["pixel_values"]
|
||||
unnormalized = unnormalize(pixel_values, mean=self.channel_means, std=self.channel_stds)
|
||||
if unnormalized.ndim == 3 or unnormalized.shape[0] == 1:
|
||||
self._display_single_image(
|
||||
unnormalized[0] if unnormalized.ndim == 4 else unnormalized,
|
||||
show_patch_grid=add_grid,
|
||||
figsize=figsize,
|
||||
)
|
||||
return
|
||||
elif unnormalized.ndim != 4:
|
||||
raise ValueError(f"Unsupported shape after unnormalization: {unnormalized.shape}")
|
||||
|
||||
num_tiles = unnormalized.shape[0]
|
||||
|
||||
if rows is None or cols is None:
|
||||
tile_h, tile_w = unnormalized.shape[1:3]
|
||||
tile_aspect = tile_w / tile_h if tile_h > 0 else 1.0
|
||||
target_aspect = aspect_ratio / tile_aspect
|
||||
|
||||
best_rows, best_cols = 1, num_tiles
|
||||
min_diff = float("inf")
|
||||
for r in range(1, num_tiles + 1):
|
||||
c = int(np.ceil(num_tiles / r))
|
||||
diff = abs((c / r) - target_aspect)
|
||||
if diff < min_diff:
|
||||
min_diff = diff
|
||||
best_rows, best_cols = r, c
|
||||
|
||||
rows = best_rows
|
||||
cols = best_cols
|
||||
self._display_tiled_images(
|
||||
unnormalized,
|
||||
pil_images[0],
|
||||
rows=rows,
|
||||
cols=cols,
|
||||
aspect_ratio=aspect_ratio,
|
||||
add_grid=add_grid,
|
||||
figsize=figsize,
|
||||
)
|
@ -107,18 +107,31 @@ class Mxfp4QuantizerTest(unittest.TestCase):
|
||||
|
||||
def test_quantizer_validation_low_compute_capability(self):
|
||||
"""Test quantizer validation with low compute capability"""
|
||||
with patch("torch.cuda.get_device_capability", return_value=(8, 0)):
|
||||
with patch("torch.cuda.get_device_capability", return_value=(7, 0)):
|
||||
from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
|
||||
|
||||
config = Mxfp4Config()
|
||||
quantizer = Mxfp4HfQuantizer(config)
|
||||
quantizer.pre_quantized = False
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
quantizer.validate_environment()
|
||||
|
||||
def test_quantizer_validation_low_compute_capability_with_prequantized(self):
|
||||
"""Test quantizer validation with low compute capability"""
|
||||
with patch("torch.cuda.get_device_capability", return_value=(7, 0)):
|
||||
from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
|
||||
|
||||
config = Mxfp4Config()
|
||||
quantizer = Mxfp4HfQuantizer(config)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
quantizer.validate_environment()
|
||||
# Should automatically set dequantize=True and warn
|
||||
quantizer.validate_environment()
|
||||
self.assertTrue(quantizer.quantization_config.dequantize)
|
||||
|
||||
def test_quantizer_validation_low_compute_capability_with_dequantize(self):
|
||||
"""Test quantizer validation with low compute capability but dequantize enabled"""
|
||||
with patch("torch.cuda.get_device_capability", return_value=(8, 0)):
|
||||
with patch("torch.cuda.get_device_capability", return_value=(7, 0)):
|
||||
from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
|
||||
|
||||
config = Mxfp4Config(dequantize=True)
|
||||
@ -131,6 +144,52 @@ class Mxfp4QuantizerTest(unittest.TestCase):
|
||||
if "compute capability" in str(e):
|
||||
self.fail("Should not raise compute capability error when dequantize=True")
|
||||
|
||||
def test_quantizer_validation_dequantize_on_cpu(self):
|
||||
"""Test quantizer validation with dequantize enabled on CPU-only environment"""
|
||||
with patch("torch.cuda.is_available", return_value=False):
|
||||
from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
|
||||
|
||||
config = Mxfp4Config(dequantize=True)
|
||||
quantizer = Mxfp4HfQuantizer(config)
|
||||
|
||||
# Should not raise error when dequantize=True even without CUDA
|
||||
try:
|
||||
quantizer.validate_environment()
|
||||
except RuntimeError as e:
|
||||
if "requires a GPU" in str(e):
|
||||
self.fail("Should not raise GPU requirement error when dequantize=True on CPU")
|
||||
|
||||
def test_quantizer_validation_order_dequantize_before_cuda_check(self):
|
||||
"""Test that dequantize check happens before CUDA availability check"""
|
||||
# Mock both torch.cuda.is_available and is_accelerate_available to return False
|
||||
with (
|
||||
patch("torch.cuda.is_available", return_value=False),
|
||||
patch(
|
||||
"transformers.quantizers.quantizer_mxfp4.is_accelerate_available",
|
||||
return_value=False,
|
||||
),
|
||||
):
|
||||
from transformers.quantizers.quantizer_mxfp4 import Mxfp4HfQuantizer
|
||||
|
||||
# Test with dequantize=True - should pass even without CUDA and accelerate
|
||||
config = Mxfp4Config(dequantize=True)
|
||||
quantizer = Mxfp4HfQuantizer(config)
|
||||
|
||||
# This should not raise any error because dequantize check comes first
|
||||
try:
|
||||
quantizer.validate_environment()
|
||||
except (RuntimeError, ImportError) as e:
|
||||
if "requires a GPU" in str(e) or "requires Accelerate" in str(e):
|
||||
self.fail(f"Should not raise error when dequantize=True: {e}")
|
||||
|
||||
# Test with dequantize=False - should still fail due to missing CUDA
|
||||
config = Mxfp4Config(dequantize=False)
|
||||
quantizer = Mxfp4HfQuantizer(config)
|
||||
|
||||
with self.assertRaises(RuntimeError) as context:
|
||||
quantizer.validate_environment()
|
||||
self.assertIn("requires a GPU", str(context.exception))
|
||||
|
||||
def test_quantizer_validation_missing_triton(self):
|
||||
"""Test quantizer validation when triton is not available"""
|
||||
with (
|
||||
|
@ -171,6 +171,12 @@ if __name__ == "__main__":
|
||||
print(f"start_commit: {args.start_commit}")
|
||||
print(f"end_commit: {args.end_commit}")
|
||||
|
||||
# `get_commit_info` uses `requests.get()` to request info. via `api.github.com` without using token.
|
||||
# If there are many new failed tests in a workflow run, this script may fail at some point with `KeyError` at
|
||||
# `pr_number = pr_info_for_commit[0]["number"]` due to the rate limit.
|
||||
# Let's cache the commit info. and reuse them whenever possible.
|
||||
commit_info_cache = {}
|
||||
|
||||
if len({args.test is None, args.file is None}) != 2:
|
||||
raise ValueError("Exactly one argument `test` or `file` must be specified.")
|
||||
|
||||
@ -191,7 +197,14 @@ if __name__ == "__main__":
|
||||
for test in failed_tests:
|
||||
commit = find_bad_commit(target_test=test, start_commit=args.start_commit, end_commit=args.end_commit)
|
||||
info = {"test": test, "commit": commit}
|
||||
info.update(get_commit_info(commit))
|
||||
|
||||
if commit in commit_info_cache:
|
||||
commit_info = commit_info_cache[commit]
|
||||
else:
|
||||
commit_info = get_commit_info(commit)
|
||||
commit_info_cache[commit] = commit_info
|
||||
|
||||
info.update(commit_info)
|
||||
failed_tests_with_bad_commits.append(info)
|
||||
|
||||
# If no single-gpu test failures, remove the key
|
||||
|
@ -345,6 +345,8 @@ SPECIAL_CASES_TO_ALLOW.update(
|
||||
"IdeficsConfig": True,
|
||||
"IdeficsVisionConfig": True,
|
||||
"IdeficsPerceiverConfig": True,
|
||||
# TODO: @Arthur/Joao (`hidden_act` unused)
|
||||
"GptOssConfig": True,
|
||||
}
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user