Compare commits

..

1 Commits

Author SHA1 Message Date
8e3fbca4cd try 2025-02-14 16:02:47 +01:00
489 changed files with 11529 additions and 18112 deletions

View File

@ -31,14 +31,6 @@ jobs:
parallelism: 1
steps:
- checkout
- run: if [[ "$CIRCLE_PULL_REQUEST" == "" && "$CIRCLE_BRANCH" != "main" && "$CIRCLE_BRANCH" != *-release ]]; then echo "Not a PR, not the main branch and not a release branch, skip test!"; circleci-agent step halt; fi
- run: 'curl -L -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" https://api.github.com/repos/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME/pulls/${CIRCLE_PULL_REQUEST##*/} >> github.txt'
- run: cat github.txt
- run: (python3 -c 'import json; from datetime import datetime; fp = open("github.txt"); data = json.load(fp); fp.close(); f = "%Y-%m-%dT%H:%M:%SZ"; created = datetime.strptime(data["created_at"], f); updated = datetime.strptime(data["updated_at"], f); s = (updated - created).total_seconds(); print(int(s))' || true) > elapsed.txt
- run: if [ "$(cat elapsed.txt)" == "" ]; then echo 60 > elapsed.txt; fi
- run: cat elapsed.txt
- run: if [ "$(cat elapsed.txt)" -lt "30" ]; then echo "PR is just opened, wait some actions from GitHub"; sleep 30; fi
- run: 'if grep -q "\"draft\": true," github.txt; then echo "draft mode, skip test!"; circleci-agent step halt; fi'
- run: uv pip install -U -e .
- run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
- run: mkdir -p test_preparation
@ -168,7 +160,7 @@ jobs:
environment:
TRANSFORMERS_IS_CI: yes
PYTEST_TIMEOUT: 120
parallelism: 1
parallelism: 4
steps:
- checkout
- run: uv pip install -e ".[quality]"
@ -177,19 +169,19 @@ jobs:
command: pip freeze | tee installed.txt
- store_artifacts:
path: ~/transformers/installed.txt
- run: python utils/check_copies.py
- run: python utils/check_modular_conversion.py --num_workers 4
- run: python utils/check_table.py
- run: python utils/check_dummies.py
- run: python utils/check_repo.py
- run: python utils/check_inits.py
- run: python utils/check_config_docstrings.py
- run: python utils/check_config_attributes.py
- run: python utils/check_doctest_list.py
- run: make deps_table_check_updated
- run: python utils/update_metadata.py --check-only
- run: python utils/check_docstrings.py
- run: python utils/check_support_list.py
- run: if [ "$CIRCLE_NODE_INDEX" == "0" ]; then python utils/check_copies.py; fi
- run: if [ "$CIRCLE_NODE_INDEX" == "1" ]; then python utils/check_modular_conversion.py --num_workers 4; fi
- run: if [ "$CIRCLE_NODE_INDEX" == "3" ]; then python utils/check_table.py; fi
- run: if [ "$CIRCLE_NODE_INDEX" == "0" ]; then python utils/check_dummies.py; fi
- run: if [ "$CIRCLE_NODE_INDEX" == "2" ]; then python utils/check_repo.py; fi
- run: if [ "$CIRCLE_NODE_INDEX" == "2" ]; then python utils/check_inits.py; fi
- run: if [ "$CIRCLE_NODE_INDEX" == "2" ]; then python utils/check_config_docstrings.py; fi
- run: if [ "$CIRCLE_NODE_INDEX" == "2" ]; then python utils/check_config_attributes.py; fi
- run: if [ "$CIRCLE_NODE_INDEX" == "0" ]; then python utils/check_doctest_list.py; fi
- run: if [ "$CIRCLE_NODE_INDEX" == "0" ]; then make deps_table_check_updated; fi
- run: if [ "$CIRCLE_NODE_INDEX" == "3" ]; then python utils/update_metadata.py --check-only; fi
- run: if [ "$CIRCLE_NODE_INDEX" == "3" ]; then python utils/check_docstrings.py; fi
- run: if [ "$CIRCLE_NODE_INDEX" == "0" ]; then python utils/check_support_list.py; fi
workflows:
version: 2

View File

@ -28,6 +28,8 @@ COMMON_ENV_VARIABLES = {
"TRANSFORMERS_IS_CI": True,
"PYTEST_TIMEOUT": 120,
"RUN_PIPELINE_TESTS": False,
"RUN_PT_TF_CROSS_TESTS": False,
"RUN_PT_FLAX_CROSS_TESTS": False,
}
# Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical
COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsfE":None}
@ -175,6 +177,23 @@ class CircleCIJob:
# JOBS
torch_and_tf_job = CircleCIJob(
"torch_and_tf",
docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
additional_env={"RUN_PT_TF_CROSS_TESTS": True},
marker="is_pt_tf_cross_test",
pytest_options={"rA": None, "durations": 0},
)
torch_and_flax_job = CircleCIJob(
"torch_and_flax",
additional_env={"RUN_PT_FLAX_CROSS_TESTS": True},
docker_image=[{"image":"huggingface/transformers-torch-jax-light"}],
marker="is_pt_flax_cross_test",
pytest_options={"rA": None, "durations": 0},
)
torch_job = CircleCIJob(
"torch",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
@ -334,7 +353,7 @@ doc_test_job = CircleCIJob(
pytest_num_workers=1,
)
REGULAR_TESTS = [torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
REGULAR_TESTS = [torch_and_tf_job, torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
EXAMPLES_TESTS = [examples_torch_job, examples_tensorflow_job]
PIPELINE_TESTS = [pipelines_torch_job, pipelines_tf_job]
REPO_UTIL_TESTS = [repo_utils_job]

View File

@ -64,7 +64,7 @@ jobs:
commit_id=$GITHUB_SHA
fi
commit_msg=$(git show -s --format=%s | cut -c1-70)
python3 benchmark/benchmarks_entrypoint.py "$BRANCH_NAME" "$commit_id" "$commit_msg"
python3 benchmark/benchmarks_entrypoint.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
env:
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
# Enable this to see debug logs
@ -73,4 +73,3 @@ jobs:
PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
PGUSER: transformers_benchmarks
PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}

View File

@ -26,7 +26,7 @@ jobs:
strategy:
matrix:
file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "jax-light", "examples-torch", "examples-tf"]
file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "torch-jax-light", "jax-light", "examples-torch", "examples-tf"]
continue-on-error: true
steps:
@ -34,11 +34,11 @@ jobs:
name: Set tag
run: |
if ${{contains(github.event.head_commit.message, '[build-ci-image]')}}; then
echo "TAG=huggingface/transformers-${{ matrix.file }}:dev" >> "$GITHUB_ENV"
echo "TAG=huggingface/transformers-${{ matrix.file }}:dev" >> "$GITHUB_ENV"
echo "setting it to DEV!"
else
echo "TAG=huggingface/transformers-${{ matrix.file }}" >> "$GITHUB_ENV"
fi
-
name: Set up Docker Buildx

View File

@ -1,22 +0,0 @@
name: Change PR to draft
on:
pull_request_target:
types: [opened, reopened]
jobs:
convert_pr_to_draft:
runs-on: ubuntu-22.04
name: Convert PR to draft
if: github.event.pull_request.draft == false
steps:
- name: Convert PR to draft
shell: bash
env:
PR_NUMBER: ${{ github.event.number }}
GH_TOKEN: ${{ github.token }}
REPO: ${{ github.repository }}
run: |
echo $PR_NUMBER
gh pr ready $PR_NUMBER --repo $REPO --undo
gh pr comment $PR_NUMBER --repo $REPO --body "Hi 👋, thank you for opening this pull request! The pull request is converted to draft by default. When it is ready for review, please click the \`Ready for review\` button (at the bottom of the PR page)."

View File

@ -22,6 +22,7 @@ env:
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1

View File

@ -30,6 +30,7 @@ env:
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
jobs:

View File

@ -30,6 +30,7 @@ env:
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
jobs:

View File

@ -1,68 +0,0 @@
# Used to notify core maintainers about new model PR being merged
name: New model PR merged notification
on:
push:
branches:
- main
paths:
- 'src/transformers/models/*/modeling_*'
jobs:
notify_new_model:
name: Notify new model
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Check new model
shell: bash
run: |
python -m pip install gitpython
python -c 'from utils.pr_slow_ci_models import get_new_model; new_model = get_new_model(diff_with_last_commit=True); print(new_model)' | tee output.txt
echo "NEW_MODEL=$(tail -n 1 output.txt)" >> $GITHUB_ENV
echo "COMMIT_SHA=$(git log -1 --format=%H)" >> $GITHUB_ENV
- name: print commit sha
if: ${{ env.NEW_MODEL != ''}}
shell: bash
run: |
echo "$COMMIT_SHA"
- name: print new model
if: ${{ env.NEW_MODEL != ''}}
shell: bash
run: |
echo "$NEW_MODEL"
- name: Notify
if: ${{ env.NEW_MODEL != ''}}
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
with:
# Slack channel id, channel name, or user id to post message.
# See also: https://api.slack.com/methods/chat.postMessage#channels
channel-id: transformers-new-model-notification
# For posting a rich message using Block Kit
payload: |
{
"blocks": [
{
"type": "header",
"text": {
"type": "plain_text",
"text": "New model!",
"emoji": true
}
},
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "<https://github.com/huggingface/transformers/commit/${{ env.COMMIT_SHA }}|New model: ${{ env.NEW_MODEL }}> GH_ArthurZucker, GH_lysandrejik, GH_ydshieh"
}
}
]
}
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

View File

@ -7,13 +7,14 @@ on:
env:
OUTPUT_SLACK_CHANNEL_ID: "C06L2SGMEEA"
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
jobs:
get_modified_models:
@ -24,13 +25,13 @@ jobs:
steps:
- name: Check out code
uses: actions/checkout@v4
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@3f54ebb830831fc121d3263c1857cfbdc310cdb9 #v42
with:
files: src/transformers/models/**
- name: Run step if only the files listed above change
if: steps.changed-files.outputs.any_changed == 'true'
id: set-matrix
@ -59,41 +60,41 @@ jobs:
if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' && fromJson(needs.get_modified_models.outputs.matrix)[0] != null }}
strategy:
fail-fast: false
matrix:
matrix:
model-name: ${{ fromJson(needs.get_modified_models.outputs.matrix) }}
steps:
- name: Check out code
uses: actions/checkout@v4
- name: Install locally transformers & other libs
run: |
apt install sudo
sudo -H pip install --upgrade pip
sudo -H pip uninstall -y transformers
sudo -H pip install -U -e ".[testing]"
sudo -H pip uninstall -y transformers
sudo -H pip install -U -e ".[testing]"
MAX_JOBS=4 pip install flash-attn --no-build-isolation
pip install bitsandbytes
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Show installed libraries and their versions
run: pip freeze
- name: Run FA2 tests
id: run_fa2_tests
run:
pytest -rsfE -m "flash_attn_test" --make-reports=${{ matrix.model-name }}_fa2_tests/ tests/${{ matrix.model-name }}/test_modeling_*
- name: "Test suite reports artifacts: ${{ matrix.model-name }}_fa2_tests"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.model-name }}_fa2_tests
path: /transformers/reports/${{ matrix.model-name }}_fa2_tests
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
@ -102,13 +103,13 @@ jobs:
title: 🤗 Results of the FA2 tests - ${{ matrix.model-name }}
status: ${{ steps.run_fa2_tests.conclusion}}
slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }}
- name: Run integration tests
id: run_integration_tests
if: always()
run:
pytest -rsfE -k "IntegrationTest" --make-reports=tests_integration_${{ matrix.model-name }} tests/${{ matrix.model-name }}/test_modeling_*
- name: "Test suite reports artifacts: tests_integration_${{ matrix.model-name }}"
if: ${{ always() }}
uses: actions/upload-artifact@v4
@ -118,7 +119,7 @@ jobs:
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
title: 🤗 Results of the Integration tests - ${{ matrix.model-name }}

View File

@ -22,6 +22,7 @@ env:
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
jobs:

View File

@ -14,6 +14,7 @@ env:
MKL_NUM_THREADS: 8
PYTEST_TIMEOUT: 60
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
jobs:

View File

@ -24,6 +24,7 @@ env:
MKL_NUM_THREADS: 8
PYTEST_TIMEOUT: 60
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
jobs:
@ -292,7 +293,7 @@ jobs:
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Update clone using environment variables
working-directory: /transformers
run: |
@ -405,7 +406,7 @@ jobs:
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Update clone using environment variables
working-directory: /workspace/transformers
run: |
@ -515,7 +516,7 @@ jobs:
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Update clone using environment variables
working-directory: /workspace/transformers
run: |
@ -647,6 +648,6 @@ jobs:
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
run: |
pip install huggingface_hub
pip install slack_sdk
pip install slack_sdk
pip show slack_sdk
python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"

View File

@ -15,7 +15,7 @@ jobs:
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
with:
job: run_models_gpu
slack_report_channel: "#amd-hf-ci"
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi250
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi250
@ -26,7 +26,7 @@ jobs:
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
with:
job: run_pipelines_torch_gpu
slack_report_channel: "#amd-hf-ci"
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi250
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi250
@ -37,7 +37,7 @@ jobs:
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
with:
job: run_examples_gpu
slack_report_channel: "#amd-hf-ci"
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi250
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi250
@ -48,7 +48,7 @@ jobs:
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
with:
job: run_torch_cuda_extensions_gpu
slack_report_channel: "#amd-hf-ci"
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi250
docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
ci_event: Scheduled CI (AMD) - mi250

View File

@ -40,6 +40,7 @@ env:
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
NUM_SLICES: 2
@ -570,4 +571,4 @@ jobs:
with:
docker: ${{ inputs.docker }}
start_sha: ${{ github.sha }}
secrets: inherit
secrets: inherit

View File

@ -5,7 +5,7 @@ on:
inputs:
runner_type:
description: 'Type of runner to test (a10 or t4)'
required: true
required: true
docker_image:
description: 'Name of the Docker image'
required: true
@ -15,14 +15,15 @@ on:
env:
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
CUDA_VISIBLE_DEVICES: 0,1
RUN_PT_TF_CROSS_TESTS: 1
jobs:
get_runner:
@ -77,7 +78,7 @@ jobs:
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: NVIDIA-SMI
run: |
nvidia-smi

View File

@ -343,6 +343,8 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/t
Like the slow tests, there are other environment variables available which are not enabled by default during testing:
- `RUN_CUSTOM_TOKENIZERS`: Enables tests for custom tokenizers.
- `RUN_PT_FLAX_CROSS_TESTS`: Enables tests for PyTorch + Flax integration.
- `RUN_PT_TF_CROSS_TESTS`: Enables tests for TensorFlow + PyTorch integration.
More environment variables and additional information can be found in the [testing_utils.py](https://github.com/huggingface/transformers/blob/main/src/transformers/testing_utils.py).

View File

@ -61,6 +61,7 @@ NOT_DEVICE_TESTS = {
"test_load_save_without_tied_weights",
"test_tied_weights_keys",
"test_model_weights_reload_no_missing_tied_weights",
"test_pt_tf_model_equivalence",
"test_mismatched_shapes_have_properly_initialized_weights",
"test_matched_shapes_have_loaded_weights_when_some_mismatched_shapes_exist",
"test_model_is_small",
@ -84,6 +85,12 @@ warnings.simplefilter(action="ignore", category=FutureWarning)
def pytest_configure(config):
config.addinivalue_line(
"markers", "is_pt_tf_cross_test: mark test to run only when PT and TF interactions are tested"
)
config.addinivalue_line(
"markers", "is_pt_flax_cross_test: mark test to run only when PT and FLAX interactions are tested"
)
config.addinivalue_line("markers", "is_pipeline_test: mark test to run only when pipelines are tested")
config.addinivalue_line("markers", "is_staging_test: mark test to run only in the staging environment")
config.addinivalue_line("markers", "accelerate_tests: mark test that require accelerate")

View File

@ -7,6 +7,6 @@ RUN apt-get install -y cmake
ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN pip install --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]"
RUN uv pip install --no-cache-dir "protobuf==3.20.3"
RUN uv pip install --no-cache-dir "protobuf==3.20.3"
RUN pip uninstall -y transformers
RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean

View File

@ -7,5 +7,5 @@ ENV UV_PYTHON=/usr/local/bin/python
RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken,num2words]"
RUN pip uninstall -y transformers
RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken]"
RUN pip uninstall -y transformers

View File

@ -12,7 +12,7 @@ RUN git lfs install
RUN python3 -m pip install --no-cache-dir --upgrade pip numpy
RUN python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2.4
RUN python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2
RUN python3 -m pip install --no-cache-dir --upgrade importlib-metadata setuptools ninja git+https://github.com/facebookresearch/detectron2.git pytesseract "itsdangerous<2.1.0"

View File

@ -2,10 +2,10 @@ FROM rocm/dev-ubuntu-22.04:6.2.4
LABEL maintainer="Hugging Face"
ARG DEBIAN_FRONTEND=noninteractive
ARG PYTORCH='2.6.0'
ARG TORCH_VISION='0.21.0'
ARG TORCH_AUDIO='2.6.0'
ARG ROCM='6.2.4'
ARG PYTORCH='2.5.1'
ARG TORCH_VISION='0.20.0'
ARG TORCH_AUDIO='2.5.0'
ARG ROCM='6.2'
RUN apt update && \
apt install -y --no-install-recommends \
@ -16,11 +16,9 @@ RUN apt update && \
python-is-python3 \
rocrand-dev \
rocthrust-dev \
rocblas-dev \
hipsolver-dev \
hipsparse-dev \
hipblas-dev \
hipblaslt-dev && \
rocblas-dev && \
apt clean && \
rm -rf /var/lib/apt/lists/*

View File

@ -76,9 +76,6 @@ RUN python3 -m pip install git+https://github.com/NetEase-FuXi/EETQ.git
RUN python3 -m pip install --no-cache-dir flute-kernel==0.3.0 -i https://flute-ai.github.io/whl/cu118
RUN python3 -m pip install --no-cache-dir fast_hadamard_transform==1.0.4.post1
# Add compressed-tensors for quantization testing
RUN python3 -m pip install --no-cache-dir compressed-tensors
# When installing in editable mode, `transformers` is not recognized as a package.
# this line must be added in order for python to be aware of transformers.
RUN cd transformers && python3 setup.py develop

View File

@ -673,29 +673,6 @@ tpu_use_sudo: false
use_cpu: false
```
</hfoption>
<hfoption id="Tensor Parallelism with PyTorch 2">
```yml
compute_environment: LOCAL_MACHINE
tp_config:
tp_size: 4
distributed_type: TP
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: 'no'
num_machines: 1
num_processes: 4
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
```
</hfoption>
</hfoptions>
يُعد أمر [`accelerate_launch`](https://huggingface.co/docs/accelerate/package_reference/cli#accelerate-launch) هو الطريقة المُوصى بها لتشغيل نص البرمجى للتدريب على نظام موزع باستخدام Accelerate و [`Trainer`] مع المعلمات المحددة في `config_file.yaml`. يتم حفظ هذا الملف في مجلد ذاكرة التخزين المؤقت لـ Accelerate ويتم تحميله تلقائيًا عند تشغيل `accelerate_launch`.

View File

@ -283,6 +283,8 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/t
Wie bei den langsamen Tests gibt es auch andere Umgebungsvariablen, die standardmäßig beim Testen nicht gesetzt sind:
* `RUN_CUSTOM_TOKENIZERS`: Aktiviert Tests für benutzerdefinierte Tokenizer.
* `RUN_PT_FLAX_CROSS_TESTS`: Aktiviert Tests für die Integration von PyTorch + Flax.
* `RUN_PT_TF_CROSS_TESTS`: Aktiviert Tests für die Integration von TensorFlow + PyTorch.
Weitere Umgebungsvariablen und zusätzliche Informationen finden Sie in der [testing_utils.py](src/transformers/testing_utils.py).

View File

@ -461,8 +461,6 @@
title: Granite
- local: model_doc/granitemoe
title: GraniteMoe
- local: model_doc/granitemoeshared
title: GraniteMoeShared
- local: model_doc/granitevision
title: GraniteVision
- local: model_doc/helium
@ -965,10 +963,6 @@
title: Segment Anything
- local: model_doc/siglip
title: SigLIP
- local: model_doc/siglip2
title: SigLIP2
- local: model_doc/smolvlm
title: SmolVLM
- local: model_doc/speech-encoder-decoder
title: Speech Encoder Decoder Models
- local: model_doc/tapas

View File

@ -173,7 +173,6 @@ Flax), PyTorch, and/or TensorFlow.
| [GPTSAN-japanese](model_doc/gptsan-japanese) | ✅ | ❌ | ❌ |
| [Granite](model_doc/granite) | ✅ | ❌ | ❌ |
| [GraniteMoeMoe](model_doc/granitemoe) | ✅ | ❌ | ❌ |
| [GraniteMoeSharedMoe](model_doc/granitemoeshared) | ✅ | ❌ | ❌ |
| [Graphormer](model_doc/graphormer) | ✅ | ❌ | ❌ |
| [Grounding DINO](model_doc/grounding-dino) | ✅ | ❌ | ❌ |
| [GroupViT](model_doc/groupvit) | ✅ | ✅ | ❌ |
@ -317,8 +316,6 @@ Flax), PyTorch, and/or TensorFlow.
| [SEW](model_doc/sew) | ✅ | ❌ | ❌ |
| [SEW-D](model_doc/sew-d) | ✅ | ❌ | ❌ |
| [SigLIP](model_doc/siglip) | ✅ | ❌ | ❌ |
| [SigLIP2](model_doc/siglip2) | ✅ | ❌ | ❌ |
| [SmolVLM](model_doc/smolvlm) | ✅ | ❌ | ❌ |
| [Speech Encoder decoder](model_doc/speech-encoder-decoder) | ✅ | ❌ | ✅ |
| [Speech2Text](model_doc/speech_to_text) | ✅ | ✅ | ❌ |
| [SpeechT5](model_doc/speecht5) | ✅ | ❌ | ❌ |

View File

@ -55,7 +55,7 @@ To give some examples of how much VRAM it roughly takes to load a model in bfloa
As of writing this document, the largest GPU chip on the market is the A100 & H100 offering 80GB of VRAM. Most of the models listed before require more than 80GB just to be loaded and therefore necessarily require [tensor parallelism](https://huggingface.co/docs/transformers/perf_train_gpu_many#tensor-parallelism) and/or [pipeline parallelism](https://huggingface.co/docs/transformers/perf_train_gpu_many#naive-model-parallelism-vertical-and-pipeline-parallelism).
🤗 Transformers now supports tensor parallelism for supported models having `base_tp_plan` in their respecitve config classes. Learn more about Tensor Parallelism [here](perf_train_gpu_many#tensor-parallelism). Furthermore, if you're interested in writing models in a tensor-parallelism-friendly way, feel free to have a look at [the text-generation-inference library](https://github.com/huggingface/text-generation-inference/tree/main/server/text_generation_server/models/custom_modeling).
🤗 Transformers does not support tensor parallelism out of the box as it requires the model architecture to be written in a specific way. If you're interested in writing models in a tensor-parallelism-friendly way, feel free to have a look at [the text-generation-inference library](https://github.com/huggingface/text-generation-inference/tree/main/server/text_generation_server/models/custom_modeling).
Naive pipeline parallelism is supported out of the box. For this, simply load the model with `device="auto"` which will automatically place the different layers on the available GPUs as explained [here](https://huggingface.co/docs/accelerate/v0.22.0/en/concept_guides/big_model_inference).
Note, however that while very effective, this naive pipeline parallelism does not tackle the issues of GPU idling. For this more advanced pipeline parallelism is required as explained [here](https://huggingface.co/docs/transformers/en/perf_train_gpu_many#naive-model-parallelism-vertical-and-pipeline-parallelism).

View File

@ -1,66 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# GraniteMoeShared
## Overview
The GraniteMoe model was proposed in [Power Scheduler: A Batch Size and Token Number Agnostic Learning Rate Scheduler](https://arxiv.org/abs/2408.13359) by Yikang Shen, Matthew Stallone, Mayank Mishra, Gaoyuan Zhang, Shawn Tan, Aditya Prasad, Adriana Meza Soria, David D. Cox and Rameswar Panda.
Additionally this class GraniteMoeSharedModel adds shared experts for Moe.
```python
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
model_path = "ibm-research/moe-7b-1b-active-shared-experts"
tokenizer = AutoTokenizer.from_pretrained(model_path)
# drop device_map if running on CPU
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")
model.eval()
# change input text as desired
prompt = "Write a code to find the maximum value in a list of numbers."
# tokenize the text
input_tokens = tokenizer(prompt, return_tensors="pt")
# generate output tokens
output = model.generate(**input_tokens, max_new_tokens=100)
# decode output tokens into text
output = tokenizer.batch_decode(output)
# loop over the batch to print, in this example the batch size is 1
for i in output:
print(i)
```
This HF implementation is contributed by [Mayank Mishra](https://huggingface.co/mayank-mishra), [Shawn Tan](https://huggingface.co/shawntan) and [Sukriti Sharma](https://huggingface.co/SukritiSharma).
## GraniteMoeSharedConfig
[[autodoc]] GraniteMoeSharedConfig
## GraniteMoeSharedModel
[[autodoc]] GraniteMoeSharedModel
- forward
## GraniteMoeSharedForCausalLM
[[autodoc]] GraniteMoeSharedForCausalLM
- forward

View File

@ -59,7 +59,7 @@ chat = [
{
"role": "user", "content": [
{"type": "text", "content": "Can this animal"},
{"type": "image", "url": "https://picsum.photos/id/237/200/300"},
{"type": "image", "ur": "https://picsum.photos/id/237/200/300"},
{"type": "text", "content": "live here?"},
{"type": "image", "url": "https://picsum.photos/seed/picsum/200/300"}
]

View File

@ -1,276 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# SigLIP2
## Overview
The SigLIP2 model was proposed in [SigLIP 2: Multilingual Vision-Language Encoders with Improved Semantic Understanding, Localization, and Dense Features](https://huggingface.co/papers/2502.14786) by Michael Tschannen, Alexey Gritsenko, Xiao Wang, Muhammad Ferjad Naeem, Ibrahim Alabdulmohsin,
Nikhil Parthasarathy, Talfan Evans, Lucas Beyer, Ye Xia, Basil Mustafa, Olivier Hénaff, Jeremiah Harmsen,
Andreas Steiner and Xiaohua Zhai.
The model comes in two variants
1) FixRes - model works with fixed resolution images (backward compatible with SigLIP v1)
2) NaFlex - model works with variable image aspect ratios and resolutions (SigLIP2 in `transformers`)
The abstract from the paper is the following:
*We introduce SigLIP 2, a family of new multilingual vision-language encoders that build on the success
of the original SigLIP. In this second iteration, we extend the original image-text training objective with
several prior, independently developed techniques into a unified recipe—this includes decoder-based
pretraining, self-supervised losses (self-distillation, masked prediction) and online data curation. With
these changes, SigLIP 2 models outperform their SigLIP counterparts at all model scales in core capabilities,
including zero-shot classification (best SigLIP 2 ViT-g/16 achieves 85.0% ImageNet zero-shot
accuracy), image-text retrieval, and transfer performance when extracting visual representations for
Vision-Language Models (VLMs). Furthermore, the new training recipe leads to significant improvements
on localization and dense prediction tasks. We also train variants which support multiple resolutions
and preserve the inputs native aspect ratio. Finally, we train on a more diverse data-mixture that
includes de-biasing techniques, leading to much better multilingual understanding and improved fair-
ness. To provide users with the ability to trade-off inference cost with performance, we release model
checkpoints at four sizes (ViT-B/86M, L/303M, So400m/400M, and g/1B).*
## Usage tips
- Usage of SigLIP2 is similar to [SigLIP](siglip) and [CLIP](clip). The main difference from CLIP is the training loss, which does not require a global view of all the pairwise similarities of images and texts within a batch. One needs to apply the sigmoid activation function to the logits, rather than the softmax.
- Training is supported but does not use `torch.distributed` utilities which may limit the scalability of batch size. However, DDP and FDSP works on single-node multi-gpu setup.
- When using the standalone [`GemmaTokenizerFast`] make sure to pass `padding="max_length"` and `max_length=64` as that's how the model was trained.
- Model was trained with *lowercased* text, make sure you make the same preprocessing for your text labels.
- To get the same results as the pipeline, a prompt template of "this is a photo of {label}" should be used.
- The NaFlex variant supports processing images at higher resolutions by adjusting the `max_num_patches` parameter in the `Processor`. The default value is `max_num_patches=256`. Increasing `max_num_patches` to 1024 (4x) will approximately double processed image height and width, while preserving the aspect ratio.
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/siglip2_metrics_table.png"
alt="drawing" width="600"/>
This model was contributed by [qubvel](https://huggingface.co/qubvel-hf).
The original code can be found [here](https://github.com/google-research/big_vision/tree/main).
## Usage example
There are 2 main ways to use SigLIP2: either using the pipeline API, which abstracts away all the complexity for you, or by using the `Siglip2Model` class yourself.
### FixRes variant
**Pipeline API**
The pipeline allows to use the model in a few lines of code:
```python
>>> from transformers import pipeline
>>> from PIL import Image
>>> import requests
>>> # load pipe
>>> image_classifier = pipeline(
... task="zero-shot-image-classification",
... model="google/siglip2-base-patch16-224",
... )
>>> # load image
>>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> # inference
>>> candidate_labels = ["2 cats", "a plane", "a remote"]
>>> outputs = image_classifier(image, candidate_labels=candidate_labels)
>>> outputs = [{"score": round(output["score"], 4), "label": output["label"] } for output in outputs]
>>> print(outputs)
[{'score': 0.1499, 'label': '2 cats'}, {'score': 0.0008, 'label': 'a remote'}, {'score': 0.0, 'label': 'a plane'}]
```
**Using the model yourself**
If you want to do the pre- and postprocessing yourself, here's how to do that:
```python
>>> from PIL import Image
>>> import requests
>>> from transformers import AutoProcessor, AutoModel
>>> import torch
>>> model = AutoModel.from_pretrained("google/siglip2-base-patch16-224")
>>> processor = AutoProcessor.from_pretrained("google/siglip2-base-patch16-224")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> candidate_labels = ["2 cats", "2 dogs"]
# follows the pipeline prompt template to get same results
>>> texts = [f"This is a photo of {label}." for label in candidate_labels]
# IMPORTANT: we pass `padding=max_length` and `max_length=64` since the model was trained with this
>>> inputs = processor(text=texts, images=image, padding="max_length", max_length=64, return_tensors="pt")
>>> with torch.no_grad():
... outputs = model(**inputs)
>>> logits_per_image = outputs.logits_per_image
>>> probs = torch.sigmoid(logits_per_image) # these are the probabilities
>>> print(f"{probs[0][0]:.1%} that image 0 is '{candidate_labels[0]}'")
15.0% that image 0 is '2 cats'
```
### NaFlex variant
NaFlex combines ideas from FlexiViT, i.e. supporting multiple, predefined sequence lengths
with a single ViT model, and NaViT, namely processing images at their native aspect ratio.
This enables processing different types of images at appropriate resolution, e.g. using a
larger resolution to process document images, while at the same time minimizing the impact
of aspect ratio distortion on certain inference tasks, e.g. on OCR.
Given a patch size and target sequence length, NaFlex preprocesses the data by first resizing
the input image such that the height and width after resizing are multiples of the patch size,
while
1. keeping the aspect ratio distortion as small as possible
2. producing a sequence length of at most the desired target sequence length (`max_num_patches`)
The resulting distortion in width and height is at most `(patch_size - 1) / width` and
`(patch_size - 1) / height`, respectively, which tends to be small for common resolutions and aspect ratios.
After resizing, the image is split into a sequence of patches, and a mask with padding information is added.
```python
>>> from PIL import Image
>>> import requests
>>> from transformers import AutoProcessor, AutoModel
>>> import torch
>>> model = AutoModel.from_pretrained("google/siglip2-base-patch16-naflex")
>>> processor = AutoProcessor.from_pretrained("google/siglip2-base-patch16-naflex")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> candidate_labels = ["2 cats", "2 dogs"]
# follows the pipeline prompt template to get same results
>>> texts = [f"This is a photo of {label}." for label in candidate_labels]
# default value for `max_num_patches` is 256, but you can increase resulted image resolution providing
# higher values e.g. `max_num_patches=512`
>>> inputs = processor(text=texts, images=image, max_num_patches=256, return_tensors="pt")
>>> with torch.no_grad():
... outputs = model(**inputs)
>>> logits_per_image = outputs.logits_per_image
>>> probs = torch.sigmoid(logits_per_image) # these are the probabilities
>>> print(f"{probs[0][0]:.1%} that image 0 is '{candidate_labels[0]}'")
21.1% that image 0 is '2 cats'
```
## Resources
A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SigLIP2.
- [Zero-shot image classification task guide](../tasks/zero_shot_image_classification)
- Demo notebook for SigLIP2 can be found [here](https://github.com/qubvel/transformers-notebooks/tree/master/notebooks/SigLIP2_inference.ipynb). 🌎
If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
## Combining SigLIP2 and Flash Attention 2
First, make sure to install the latest version of Flash Attention 2.
```bash
pip install -U flash-attn --no-build-isolation
```
Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
To load and run a model using Flash Attention 2, refer to the snippet below:
```python
>>> import torch
>>> import requests
>>> from PIL import Image
>>> from transformers import AutoProcessor, AutoModel
>>> device = "cuda" # the device to load the model onto
>>> model = AutoModel.from_pretrained(
... "google/siglip2-so400m-patch14-384",
... attn_implementation="flash_attention_2",
... torch_dtype=torch.float16,
... device_map=device,
... )
>>> processor = AutoProcessor.from_pretrained("google/siglip2-so400m-patch14-384")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> candidate_labels = ["2 cats", "2 dogs"]
# follows the pipeline prompt template to get same results
>>> texts = [f'This is a photo of {label}.' for label in candidate_labels]
# important: we pass `padding=max_length` since the model was trained with this
>>> inputs = processor(text=texts, images=image, padding="max_length", return_tensors="pt").to(device)
>>> with torch.no_grad():
... with torch.autocast(device):
... outputs = model(**inputs)
>>> logits_per_image = outputs.logits_per_image
>>> probs = torch.sigmoid(logits_per_image) # these are the probabilities
>>> print(f"{probs[0][0]:.1%} that image 0 is '{candidate_labels[0]}'")
19.8% that image 0 is '2 cats'
```
## Siglip2Config
[[autodoc]] Siglip2Config
## Siglip2TextConfig
[[autodoc]] Siglip2TextConfig
## Siglip2VisionConfig
[[autodoc]] Siglip2VisionConfig
## Siglip2ImageProcessor
[[autodoc]] Siglip2ImageProcessor
- preprocess
## Siglip2ImageProcessorFast
[[autodoc]] Siglip2ImageProcessorFast
- preprocess
## Siglip2Processor
[[autodoc]] Siglip2Processor
## Siglip2Model
[[autodoc]] Siglip2Model
- forward
- get_text_features
- get_image_features
## Siglip2TextModel
[[autodoc]] Siglip2TextModel
- forward
## Siglip2VisionModel
[[autodoc]] Siglip2VisionModel
- forward
## Siglip2ForImageClassification
[[autodoc]] Siglip2ForImageClassification
- forward

View File

@ -1,197 +0,0 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# SmolVLM
## Overview
SmolVLM2 is an adaptation of the Idefics3 model with two main differences:
- It uses SmolLM2 for the text model.
- It supports multi-image and video inputs
## Usage tips
Input images are processed either by upsampling (if resizing is enabled) or at their original resolution. The resizing behavior depends on two parameters: do_resize and size.
Videos should not be upsampled.
If `do_resize` is set to `True`, the model resizes images so that the longest edge is 4*512 pixels by default.
The default resizing behavior can be customized by passing a dictionary to the `size` parameter. For example, `{"longest_edge": 4 * 512}` is the default, but you can change it to a different value if needed.
Heres how to control resizing and set a custom size:
```python
image_processor = SmolVLMImageProcessor(do_resize=True, size={"longest_edge": 2 * 512}, max_image_size=512)
```
Additionally, the `max_image_size` parameter, which controls the size of each square patch the image is decomposed into, is set to 512 by default but can be adjusted as needed. After resizing (if applicable), the image processor decomposes the images into square patches based on the `max_image_size` parameter.
This model was contributed by [orrzohar](https://huggingface.co/orrzohar).
## Usage example
### Single Media inference
The model can accept both images and videos as input, but you should use only one of the modalities at a time. Here's an example code for that.
```python
import torch
from transformers import AutoProcessor, AutoModelForImageTextToText
processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM2-256M-Video-Instruct")
model = AutoModelForImageTextToText.from_pretrained(
"HuggingFaceTB/SmolVLM2-256M-Video-Instruct",
torch_dtype=torch.bfloat16,
device_map="cuda"
)
conversation = [
{
"role": "user",
"content":[
{"type": "image", "url": "http://images.cocodataset.org/val2017/000000039769.jpg"},
{"type": "text", "text": "Describe this image."}
]
}
]
inputs = processor.apply_chat_template(
conversation,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(model.device, dtype=torch.bfloat16)
output_ids = model.generate(**inputs, max_new_tokens=128)
generated_texts = processor.batch_decode(output_ids, skip_special_tokens=True)
print(generated_texts)
# Video
conversation = [
{
"role": "user",
"content": [
{"type": "video", "path": "/path/to/video.mp4"},
{"type": "text", "text": "Describe this video in detail"}
]
},
]
inputs = processor.apply_chat_template(
conversation,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(model.device, dtype=torch.bfloat16)
generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=100)
generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)
print(generated_texts[0])
```
### Batch Mixed Media Inference
The model can batch inputs composed of several images/videos and text. Here is an example.
```python
import torch
from transformers import AutoProcessor, AutoModelForImageTextToText
processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM2-256M-Video-Instruct")
model = AutoModelForImageTextToText.from_pretrained(
"HuggingFaceTB/SmolVLM2-256M-Video-Instruct",
torch_dtype=torch.bfloat16,
device_map="cuda"
)
# Conversation for the first image
conversation1 = [
{
"role": "user",
"content": [
{"type": "image", "path": "/path/to/image.jpg"},
{"type": "text", "text": "Describe this image."}
]
}
]
# Conversation with two images
conversation2 = [
{
"role": "user",
"content": [
{"type": "image", "path": "/path/to/image.jpg"},
{"type": "image", "path": "/path/to/image.jpg"},
{"type": "text", "text": "What is written in the pictures?"}
]
}
]
# Conversation with pure text
conversation3 = [
{"role": "user","content": "who are you?"}
]
conversations = [conversation1, conversation2, conversation3]
inputs = processor.apply_chat_template(
conversation,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(model.device, dtype=torch.bfloat16)
generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=100)
generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)
print(generated_texts[0])
```
## SmolVLMConfig
[[autodoc]] SmolVLMConfig
## SmolVLMVisionConfig
[[autodoc]] SmolVLMVisionConfig
## Idefics3VisionTransformer
[[autodoc]] SmolVLMVisionTransformer
## SmolVLMModel
[[autodoc]] SmolVLMModel
- forward
## SmolVLMForConditionalGeneration
[[autodoc]] SmolVLMForConditionalGeneration
- forward
## SmolVLMImageProcessor
[[autodoc]] SmolVLMImageProcessor
- preprocess
## SmolVLMProcessor
[[autodoc]] SmolVLMProcessor
- __call__

View File

@ -60,7 +60,6 @@ FlashAttention-2 is currently supported for the following architectures:
* [GPT-J](https://huggingface.co/docs/transformers/model_doc/gptj#transformers.GPTJModel)
* [Granite](https://huggingface.co/docs/transformers/model_doc/granite#transformers.GraniteModel)
* [GraniteMoe](https://huggingface.co/docs/transformers/model_doc/granitemoe#transformers.GraniteMoeModel)
* [GraniteMoeShared](https://huggingface.co/docs/transformers/model_doc/granitemoeshared#transformers.GraniteMoeSharedModel)
* [Idefics2](https://huggingface.co/docs/transformers/model_doc/idefics2#transformers.Idefics2Model)
* [Idefics3](https://huggingface.co/docs/transformers/model_doc/idefics3#transformers.Idefics3Model)
* [Falcon](https://huggingface.co/docs/transformers/model_doc/falcon#transformers.FalconModel)
@ -95,7 +94,6 @@ FlashAttention-2 is currently supported for the following architectures:
* [PhiMoE](https://huggingface.co/docs/transformers/model_doc/phimoe#transformers.PhimoeModel)
* [StableLm](https://huggingface.co/docs/transformers/model_doc/stablelm#transformers.StableLmModel)
* [Starcoder2](https://huggingface.co/docs/transformers/model_doc/starcoder2#transformers.Starcoder2Model)
* [SmolVLM](https://huggingface.co/docs/transformers/model_doc/smolvlm#transformers.SmolVLMModel)
* [Qwen2](https://huggingface.co/docs/transformers/model_doc/qwen2#transformers.Qwen2Model)
* [Qwen2Audio](https://huggingface.co/docs/transformers/model_doc/qwen2_audio#transformers.Qwen2AudioEncoder)
* [Qwen2MoE](https://huggingface.co/docs/transformers/model_doc/qwen2_moe#transformers.Qwen2MoeModel)
@ -111,7 +109,6 @@ FlashAttention-2 is currently supported for the following architectures:
* [data2vec_audio](https://huggingface.co/docs/transformers/main/en/model_doc/data2vec#transformers.Data2VecAudioModel)
* [Sew](https://huggingface.co/docs/transformers/main/en/model_doc/sew#transformers.SEWModel)
* [SigLIP](https://huggingface.co/docs/transformers/model_doc/siglip)
* [SigLIP2](https://huggingface.co/docs/transformers/model_doc/siglip2)
* [UniSpeech](https://huggingface.co/docs/transformers/v4.39.3/en/model_doc/unispeech#transformers.UniSpeechModel)
* [unispeech_sat](https://huggingface.co/docs/transformers/v4.39.3/en/model_doc/unispeech-sat#transformers.UniSpeechSatModel)
* [helium](https://huggingface.co/docs/transformers/main/en/model_doc/heliumtransformers.HeliumModel)
@ -269,7 +266,6 @@ For now, Transformers supports SDPA inference and training for the following arc
* [Idefics3](https://huggingface.co/docs/transformers/model_doc/idefics3#transformers.Idefics3Model)
* [I-JEPA](https://huggingface.co/docs/transformers/model_doc/ijepa#transformers.IJepaModel)
* [GraniteMoe](https://huggingface.co/docs/transformers/model_doc/granitemoe#transformers.GraniteMoeModel)
* [GraniteMoeShared](https://huggingface.co/docs/transformers/model_doc/granitemoeshared#transformers.GraniteMoeSharedModel)
* [JetMoe](https://huggingface.co/docs/transformers/model_doc/jetmoe#transformers.JetMoeModel)
* [Jamba](https://huggingface.co/docs/transformers/model_doc/jamba#transformers.JambaModel)
* [Llama](https://huggingface.co/docs/transformers/model_doc/llama#transformers.LlamaModel)
@ -303,7 +299,6 @@ For now, Transformers supports SDPA inference and training for the following arc
* [Mixtral](https://huggingface.co/docs/transformers/model_doc/mixtral#transformers.MixtralModel)
* [StableLm](https://huggingface.co/docs/transformers/model_doc/stablelm#transformers.StableLmModel)
* [Starcoder2](https://huggingface.co/docs/transformers/model_doc/starcoder2#transformers.Starcoder2Model)
* [SmolVLM](https://huggingface.co/docs/transformers/model_doc/smolvlm#transformers.SmolVLMModel)
* [Qwen2](https://huggingface.co/docs/transformers/model_doc/qwen2#transformers.Qwen2Model)
* [Qwen2Audio](https://huggingface.co/docs/transformers/model_doc/qwen2_audio#transformers.Qwen2AudioEncoder)
* [Qwen2MoE](https://huggingface.co/docs/transformers/model_doc/qwen2_moe#transformers.Qwen2MoeModel)
@ -311,7 +306,6 @@ For now, Transformers supports SDPA inference and training for the following arc
* [RoBERTa](https://huggingface.co/docs/transformers/model_doc/roberta#transformers.RobertaModel)
* [Sew](https://huggingface.co/docs/transformers/main/en/model_doc/sew#transformers.SEWModel)
* [SigLIP](https://huggingface.co/docs/transformers/model_doc/siglip)
* [SigLIP2](https://huggingface.co/docs/transformers/model_doc/siglip2)
* [StableLm](https://huggingface.co/docs/transformers/model_doc/stablelm#transformers.StableLmModel)
* [Starcoder2](https://huggingface.co/docs/transformers/model_doc/starcoder2#transformers.Starcoder2Model)
* [UniSpeech](https://huggingface.co/docs/transformers/v4.39.3/en/model_doc/unispeech#transformers.UniSpeechModel)

View File

@ -450,13 +450,12 @@ Implementations:
- [parallelformers](https://github.com/tunib-ai/parallelformers) (only inference at the moment)
- [SageMaker](https://arxiv.org/abs/2111.05972) - this is a proprietary solution that can only be used on AWS.
- [OSLO](https://github.com/tunib-ai/oslo) has the tensor parallelism implementation based on the Transformers.
- [`transformers` integration](main_classes/trainer) tensor parallelism is available through tp_size attribute for models having `base_tp_plan`. Further you can look at [example usage](perf_infer_gpu_multi)
SageMaker combines TP with DP for a more efficient processing.
🤗 Transformers status:
- core: uses PyTorch 2 APIs to support tensor parallelism to models having base_tp_plan in their respective config classes.
- Alternatively, you can as well try [parallelformers](https://github.com/tunib-ai/parallelformers) that provides this support for most of our models. Training mode with TP is as well supported natively in transformers.
- core: not yet implemented in the core
- but if you want inference [parallelformers](https://github.com/tunib-ai/parallelformers) provides this support for most of our models. So until this is implemented in the core you can use theirs. And hopefully training mode will be supported too.
- Deepspeed-Inference also supports our BERT, GPT-2, and GPT-Neo models in their super-fast CUDA-kernel-based inference mode, see more [here](https://www.deepspeed.ai/tutorials/inference-tutorial/)
🤗 Accelerate integrates with [TP from Megatron-LM](https://huggingface.co/docs/accelerate/v0.23.0/en/usage_guides/megatron_lm).
@ -536,7 +535,7 @@ Important papers:
- [Using DeepSpeed and Megatron to Train Megatron-Turing NLG 530B, A Large-Scale Generative Language Model](
https://arxiv.org/abs/2201.11990)
🤗 Transformers status: not yet implemented, since we have no PP.
🤗 Transformers status: not yet implemented, since we have no PP and TP.
## FlexFlow

View File

@ -59,7 +59,7 @@ Use the table below to help you decide which quantization method to use.
| [HQQ](./hqq.md) | 🟢 | 🟢 | 🟢 | 🔴 | 🔴 | 🔴 | 🟢 | 1/8 | 🟢 | 🔴 | 🟢 | https://github.com/mobiusml/hqq/ |
| [optimum-quanto](./quanto.md) | 🟢 | 🟢 | 🟢 | 🔴 | 🟢 | 🔴 | 🟢 | 2/4/8 | 🔴 | 🔴 | 🟢 | https://github.com/huggingface/optimum-quanto |
| [FBGEMM_FP8](./fbgemm_fp8.md) | 🟢 | 🔴 | 🟢 | 🔴 | 🔴 | 🔴 | 🔴 | 8 | 🔴 | 🟢 | 🟢 | https://github.com/pytorch/FBGEMM |
| [torchao](./torchao.md) | 🟢 | 🟢 | 🟢 | 🔴 | 🟡 <sub>5</sub> | 🔴 | | 4/8 | | 🟢🔴 | 🟢 | https://github.com/pytorch/ao |
| [torchao](./torchao.md) | 🟢 | | 🟢 | 🔴 | 🟡 <sub>5</sub> | 🔴 | | 4/8 | | 🟢🔴 | 🟢 | https://github.com/pytorch/ao |
| [VPTQ](./vptq.md) | 🔴 | 🔴 | 🟢 | 🟡 | 🔴 | 🔴 | 🟢 | 1/8 | 🔴 | 🟢 | 🟢 | https://github.com/microsoft/VPTQ |
| [SpQR](./spqr.md) | 🔴 | 🔴 | 🟢 | 🔴 | 🔴 | 🔴 | 🟢 | 3 | 🔴 | 🟢 | 🟢 | https://github.com/Vahe1994/SpQR/ |
| [FINEGRAINED_FP8](./finegrained_fp8.md) | 🟢 | 🔴 | 🟢 | 🔴 | 🔴 | 🔴 | 🔴 | 8 | 🔴 | 🟢 | 🟢 | |

View File

@ -22,14 +22,6 @@ pip install --upgrade torch torchao transformers
By default, the weights are loaded in full precision (torch.float32) regardless of the actual data type the weights are stored in such as torch.float16. Set `torch_dtype="auto"` to load the weights in the data type defined in a model's `config.json` file to automatically load the most memory-optimal data type.
## Manually Choose Quantization Types and Settings
`torchao` Provides many commonly used types of quantization, including different dtypes like int4, float8 and different flavors like weight only, dynamic quantization etc., only `int4_weight_only`, `int8_weight_only` and `int8_dynamic_activation_int8_weight` are integrated into hugigngface transformers currently, but we can add more when needed.
If you want to run the following codes on CPU even with GPU available, just change `device_map="cpu"` and `quantization_config = TorchAoConfig("int4_weight_only", group_size=128, layout=Int4CPULayout())` where `layout` comes from `from torchao.dtypes import Int4CPULayout` which is only available from torchao 0.8.0 and higher.
Users can manually specify the quantization types and settings they want to use:
```py
import torch
from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer
@ -40,67 +32,31 @@ model_name = "meta-llama/Meta-Llama-3-8B"
quantization_config = TorchAoConfig("int4_weight_only", group_size=128)
quantized_model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto", quantization_config=quantization_config)
tokenizer = AutoTokenizer.from_pretrained(model_name)
input_text = "What are we having for dinner?"
input_ids = tokenizer(input_text, return_tensors="pt").to(quantized_model.device)
# auto-compile the quantized model with `cache_implementation="static"` to get speedup
output = quantized_model.generate(**input_ids, max_new_tokens=10, cache_implementation="static")
print(tokenizer.decode(output[0], skip_special_tokens=True))
# benchmark the performance
from torch._inductor.utils import do_bench_using_profiling
from typing import Callable
def benchmark_fn(func: Callable, *args, **kwargs) -> float:
"""Thin wrapper around do_bench_using_profiling"""
no_args = lambda: func(*args, **kwargs)
time = do_bench_using_profiling(no_args)
return time * 1e3
MAX_NEW_TOKENS = 1000
print("int4wo-128 model:", benchmark_fn(quantized_model.generate, **input_ids, max_new_tokens=MAX_NEW_TOKENS, cache_implementation="static"))
bf16_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16)
output = bf16_model.generate(**input_ids, max_new_tokens=10, cache_implementation="static") # auto-compile
print("bf16 model:", benchmark_fn(bf16_model.generate, **input_ids, max_new_tokens=MAX_NEW_TOKENS, cache_implementation="static"))
```
## Automatically Select Quantization Types
`torchao` also provies `autoquant` feature that automatically chooses a quantization type for quantizable layers such as linear based on microbenchmarks of quantizing and compiling a single linear layer.
```py
import torch
from transformers import TorchAoConfig, AutoModelForCausalLM, AutoTokenizer
model_name = "meta-llama/Meta-Llama-3-8B"
quantization_config = TorchAoConfig("autoquant", min_sqnr=None)
quantized_model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto", quantization_config=quantization_config)
tokenizer = AutoTokenizer.from_pretrained(model_name)
input_text = "What are we having for dinner?"
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
# auto-compile the quantized model with `cache_implementation="static"` to get speedup
output = quantized_model.generate(**input_ids, max_new_tokens=10, cache_implementation="static")
# Due to some implementation details we are explicitly calling this now, we may refactor our code and remove this in the future
quantized_model.finalize_autoquant()
print(tokenizer.decode(output[0], skip_special_tokens=True))
# benchmark the performance
from torch._inductor.utils import do_bench_using_profiling
from typing import Callable
import torch.utils.benchmark as benchmark
def benchmark_fn(func: Callable, *args, **kwargs) -> float:
"""Thin wrapper around do_bench_using_profiling"""
no_args = lambda: func(*args, **kwargs)
time = do_bench_using_profiling(no_args)
return time * 1e3
def benchmark_fn(f, *args, **kwargs):
# Manual warmup
for _ in range(5):
f(*args, **kwargs)
t0 = benchmark.Timer(
stmt="f(*args, **kwargs)",
globals={"args": args, "kwargs": kwargs, "f": f},
num_threads=torch.get_num_threads(),
)
return f"{(t0.blocked_autorange().mean):.3f}"
MAX_NEW_TOKENS = 1000
print("autoquantized model:", benchmark_fn(quantized_model.generate, **input_ids, max_new_tokens=MAX_NEW_TOKENS, cache_implementation="static"))
print("int4wo-128 model:", benchmark_fn(quantized_model.generate, **input_ids, max_new_tokens=MAX_NEW_TOKENS, cache_implementation="static"))
bf16_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda", torch_dtype=torch.bfloat16)
output = bf16_model.generate(**input_ids, max_new_tokens=10, cache_implementation="static") # auto-compile
@ -124,7 +80,7 @@ quantized_model.save_pretrained(output_dir, safe_serialization=False)
# load quantized model
ckpt_id = "llama3-8b-int4wo-128" # or huggingface hub model id
loaded_quantized_model = AutoModelForCausalLM.from_pretrained(ckpt_id, device_map="auto")
loaded_quantized_model = AutoModelForCausalLM.from_pretrained(ckpt_id, device_map="cuda")
# confirm the speedup

View File

@ -799,29 +799,6 @@ tpu_use_sudo: false
use_cpu: false
```
</hfoption>
<hfoption id="Tensor Parallelism with PyTorch 2">
```yml
compute_environment: LOCAL_MACHINE
tp_config:
tp_size: 4
distributed_type: TP
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: 'no'
num_machines: 1
num_processes: 4
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
```
</hfoption>
</hfoptions>

View File

@ -361,30 +361,6 @@ use_cpu: false
```
</hfoption>
<hfoption id="Tensor Parallelism with PyTorch 2">
```yml
compute_environment: LOCAL_MACHINE
tp_config:
tp_size: 4
distributed_type: TP
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: 'no'
num_machines: 1
num_processes: 4
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
```
</hfoption>
</hfoptions>

View File

@ -85,7 +85,7 @@ python src/transformers/commands/transformers_cli.py env
3. 해당 기능의 사용법을 보여주는 *코드 스니펫*을 제공해 주세요.
4. 기능과 관련된 논문이 있는 경우 링크를 포함해 주세요.
이슈가 잘 작성되었다면 이슈가 생성된 순간, 이미 80% 정도의 작업이 완료된 것입니다.
이슈가 잘 작성되었다면 이슈가 생성된 순간, 이미 80% 정도의 작업이 완료된 것입니다.
이슈를 제기하는 데 도움이 될 만한 [템플릿](https://github.com/huggingface/transformers/tree/main/templates)도 준비되어 있습니다.
@ -140,7 +140,7 @@ python src/transformers/commands/transformers_cli.py env
```
만약 이미 가상 환경에 🤗 Transformers가 설치되어 있다면, `-e` 플래그를 사용하여 설치하기 전에 `pip uninstall transformers`로 제거해주세요.
여러분의 운영체제에 따라서, 그리고 🤗 Transformers의 선택적 의존성의 수가 증가하면서, 이 명령이 실패할 수도 있습니다. 그럴 경우 사용하려는 딥러닝 프레임워크(PyTorch, TensorFlow, 그리고/또는 Flax)를 설치한 후 아래 명령을 실행해주세요:
```bash
@ -188,7 +188,7 @@ python src/transformers/commands/transformers_cli.py env
이러한 검사에 대해 자세히 알아보고 관련 문제를 해결하는 방법은 [Pull Request에 대한 검사](https://huggingface.co/docs/transformers/pr_checks) 가이드를 확인하세요.
만약 `docs/source` 디렉터리 아래의 문서를 수정하는 경우, 문서가 빌드될 수 있는지 확인하세요. 이 검사는 Pull Request를 열 때도 CI에서 실행됩니다. 로컬 검사를 실행하려면 문서 빌더를 설치해야 합니다:
```bash
pip install ".[docs]"
```
@ -216,7 +216,7 @@ python src/transformers/commands/transformers_cli.py env
git fetch upstream
git rebase upstream/main
```
변경 사항을 브랜치에 푸시하세요:
```bash
@ -238,7 +238,7 @@ python src/transformers/commands/transformers_cli.py env
☐ 새로운 기능을 추가하는 경우, 해당 기능에 대한 테스트도 추가하세요.<br>
- 새 모델을 추가하는 경우, `ModelTester.all_model_classes = (MyModel, MyModelWithLMHead,...)`을 사용하여 일반적인 테스트를 활성화하세요.
- 새 `@slow` 테스트를 추가하는 경우, 다음 명령으로 테스트를 통과하는지 확인하세요: `RUN_SLOW=1 python -m pytest tests/models/my_new_model/test_my_new_model.py`.
- 새 토크나이저를 추가하는 경우, 테스트를 작성하고 다음 명령으로 테스트를 통과하는지 확인하세요: `RUN_SLOW=1 python -m pytest tests/models/{your_model_name}/test_tokenization_{your_model_name}.py`.
- 새 토크나이저를 추가하는 경우, 테스트를 작성하고 다음 명령으로 테스트를 통과하는지 확인하세요: `RUN_SLOW=1 python -m pytest tests/models/{your_model_name}/test_tokenization_{your_model_name}.py`.
- CircleCI에서는 느린 테스트를 실행하지 않지만, GitHub Actions에서는 매일 밤 실행됩니다!<br>
☐ 모든 공개 메소드는 유용한 기술문서를 가져야 합니다 (예를 들어 [`modeling_bert.py`](https://github.com/huggingface/transformers/blob/main/src/transformers/models/bert/modeling_bert.py) 참조).<br>
@ -282,6 +282,8 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/t
느린 테스트와 마찬가지로, 다음과 같이 테스트 중에 기본적으로 활성화되지 않는 다른 환경 변수도 있습니다:
- `RUN_CUSTOM_TOKENIZERS`: 사용자 정의 토크나이저 테스트를 활성화합니다.
- `RUN_PT_FLAX_CROSS_TESTS`: PyTorch + Flax 통합 테스트를 활성화합니다.
- `RUN_PT_TF_CROSS_TESTS`: TensorFlow + PyTorch 통합 테스트를 활성화합니다.
더 많은 환경 변수와 추가 정보는 [testing_utils.py](src/transformers/testing_utils.py)에서 찾을 수 있습니다.

View File

@ -548,29 +548,6 @@ tpu_use_sudo: false
use_cpu: false
```
</hfoption>
<hfoption id="Tensor Parallelism with PyTorch 2">
```yml
compute_environment: LOCAL_MACHINE
tp_config:
tp_size: 4
distributed_type: TP
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: 'no'
num_machines: 1
num_processes: 4
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
```
</hfoption>
</hfoptions>

View File

@ -33,7 +33,7 @@ limitations under the License.
* 实现新的模型。
* 为示例或文档做贡献。
如果你不知道从哪里开始,有一个特别的 [Good First Issue](https://github.com/huggingface/transformers/contribute) 列表。它会列出一些适合初学者的开放的 issues并帮助你开始为开源项目做贡献。只需要在你想要处理的 issue 下发表评论就行。
如果你不知道从哪里开始,有一个特别的 [Good First Issue](https://github.com/huggingface/transformers/contribute) 列表。它会列出一些适合初学者的开放的 issues并帮助你开始为开源项目做贡献。只需要在你想要处理的 issue 下发表评论就行。
如果想要稍微更有挑战性的内容,你也可以查看 [Good Second Issue](https://github.com/huggingface/transformers/labels/Good%20Second%20Issue) 列表。总的来说,如果你觉得自己知道该怎么做,就去做吧,我们会帮助你达到目标的!🚀
@ -139,7 +139,7 @@ python src/transformers/commands/transformers_cli.py env
```
如果在虚拟环境中已经安装了 🤗 Transformers请先使用 `pip uninstall transformers` 卸载它,然后再用 `-e` 参数以可编辑模式重新安装。
根据你的操作系统,以及 Transformers 的可选依赖项数量的增加可能会在执行此命令时出现失败。如果出现这种情况请确保已经安装了你想使用的深度学习框架PyTorch, TensorFlow 和 Flax然后执行以下操作
```bash
@ -187,7 +187,7 @@ python src/transformers/commands/transformers_cli.py env
想要了解有关这些检查及如何解决相关问题的更多信息,请阅读 [检查 Pull Request](https://huggingface.co/docs/transformers/pr_checks) 指南。
如果你修改了 `docs/source` 目录下的文档,请确保文档仍然能够被构建。这个检查也会在你创建 PR 时在 CI 中运行。如果要进行本地检查,请确保安装了文档构建工具:
```bash
pip install ".[docs]"
```
@ -281,6 +281,8 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/t
和时间较长的测试一样,还有其他环境变量在测试过程中,在默认情况下是未启用的:
- `RUN_CUSTOM_TOKENIZERS`: 启用自定义分词器的测试。
- `RUN_PT_FLAX_CROSS_TESTS`: 启用 PyTorch + Flax 整合的测试。
- `RUN_PT_TF_CROSS_TESTS`: 启用 TensorFlow + PyTorch 整合的测试。
更多环境变量和额外信息可以在 [testing_utils.py](src/transformers/testing_utils.py) 中找到。

View File

@ -61,7 +61,7 @@ from transformers.utils import check_min_version, send_example_telemetry
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
Array = Any
Dataset = datasets.arrow_dataset.Dataset

View File

@ -60,7 +60,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risk.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/flax/speech-recognition/requirements.txt")

View File

@ -56,7 +56,7 @@ from transformers.utils import check_min_version, send_example_telemetry
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
Array = Any
Dataset = datasets.arrow_dataset.Dataset

View File

@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")

View File

@ -140,11 +140,6 @@ class MyNewModelConfig(PretrainedConfig):
"layers.*.mlp.up_proj": "colwise",
"layers.*.mlp.down_proj": "rowwise",
}
base_model_pp_plan = {
"embed_tokens": (["input_ids"], ["inputs_embeds"]),
"layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
"norm": (["hidden_states"], ["hidden_states"]),
}
def __init__(
self,

View File

@ -43,11 +43,6 @@ class MyNewModel2Config(PretrainedConfig):
"layers.*.mlp.up_proj": "colwise",
"layers.*.mlp.down_proj": "rowwise",
}
base_model_pp_plan = {
"embed_tokens": (["input_ids"], ["inputs_embeds"]),
"layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
"norm": (["hidden_states"], ["hidden_states"]),
}
def __init__(
self,

View File

@ -79,20 +79,6 @@ class NewModelConfig(PretrainedConfig):
model_type = "new_model"
keys_to_ignore_at_inference = ["past_key_values"]
base_model_tp_plan = {
"layers.*.self_attn.q_proj": "colwise",
"layers.*.self_attn.k_proj": "colwise",
"layers.*.self_attn.v_proj": "colwise",
"layers.*.self_attn.o_proj": "rowwise",
"layers.*.mlp.gate_proj": "colwise",
"layers.*.mlp.up_proj": "colwise",
"layers.*.mlp.down_proj": "rowwise",
}
base_model_pp_plan = {
"embed_tokens": (["input_ids"], ["inputs_embeds"]),
"layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
"norm": (["hidden_states"], ["hidden_states"]),
}
def __init__(
self,

View File

@ -19,7 +19,7 @@ from ...image_utils import (
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_flat_list_of_images,
make_list_of_images,
to_numpy_array,
valid_images,
validate_preprocess_arguments,
@ -221,7 +221,8 @@ class ImgprocModelImageProcessor(BaseImageProcessor):
size = size if size is not None else self.size
size = get_size_dict(size, default_to_square=False)
images = make_flat_list_of_images(images)
images = make_list_of_images(images)
if not valid_images(images):
raise ValueError(

View File

@ -356,7 +356,6 @@ class DummyPreTrainedModel(PreTrainedModel):
_supports_cache_class = True
_supports_quantized_cache = True
_supports_static_cache = True
_supports_attention_backend = True
def _init_weights(self, module):
std = self.config.initializer_range
@ -699,9 +698,7 @@ class DummyModel(DummyPreTrainedModel):
if attention_mask is not None:
causal_mask = causal_mask.clone() # copy to contiguous memory for in-place edit
mask_length = attention_mask.shape[-1]
padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :].to(
causal_mask.device
)
padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :]
padding_mask = padding_mask == 0
causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill(
padding_mask, min_dtype

View File

@ -356,7 +356,6 @@ class Multimodal1TextPreTrainedModel(PreTrainedModel):
_supports_cache_class = True
_supports_quantized_cache = True
_supports_static_cache = True
_supports_attention_backend = True
def _init_weights(self, module):
std = self.config.initializer_range
@ -699,9 +698,7 @@ class Multimodal1TextModel(Multimodal1TextPreTrainedModel):
if attention_mask is not None:
causal_mask = causal_mask.clone() # copy to contiguous memory for in-place edit
mask_length = attention_mask.shape[-1]
padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :].to(
causal_mask.device
)
padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :]
padding_mask = padding_mask == 0
causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill(
padding_mask, min_dtype

View File

@ -356,7 +356,6 @@ class MyNewModel2PreTrainedModel(PreTrainedModel):
_supports_cache_class = True
_supports_quantized_cache = True
_supports_static_cache = True
_supports_attention_backend = True
def _init_weights(self, module):
std = self.config.initializer_range
@ -492,7 +491,6 @@ class MyNewModel2Model(MyNewModel2PreTrainedModel):
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
cache_position: Optional[torch.LongTensor] = None,
**kwargs, # NOOP kwarg for now
) -> Union[Tuple, BaseModelOutputWithPast]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
@ -705,9 +703,7 @@ class MyNewModel2Model(MyNewModel2PreTrainedModel):
if attention_mask is not None:
causal_mask = causal_mask.clone() # copy to contiguous memory for in-place edit
mask_length = attention_mask.shape[-1]
padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :].to(
causal_mask.device
)
padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :]
padding_mask = padding_mask == 0
causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill(
padding_mask, min_dtype
@ -791,20 +787,17 @@ class MyNewModel2ForSequenceClassification(MyNewModel2PreTrainedModel):
if self.config.pad_token_id is None and batch_size != 1:
raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
if self.config.pad_token_id is None:
last_non_pad_token = -1
elif input_ids is not None:
# To handle both left- and right- padding, we take the rightmost token that is not equal to pad_token_id
non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
token_indices = torch.arange(input_ids.shape[-1], device=logits.device)
last_non_pad_token = (token_indices * non_pad_mask).argmax(-1)
sequence_lengths = -1
else:
last_non_pad_token = -1
logger.warning_once(
f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
"unexpected if using padding tokens in conjunction with `inputs_embeds.`"
)
if input_ids is not None:
# if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
sequence_lengths = torch.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1
sequence_lengths = sequence_lengths % input_ids.shape[-1]
sequence_lengths = sequence_lengths.to(logits.device)
else:
sequence_lengths = -1
pooled_logits = logits[torch.arange(batch_size, device=logits.device), last_non_pad_token]
pooled_logits = logits[torch.arange(batch_size, device=logits.device), sequence_lengths]
loss = None
if labels is not None:

View File

@ -19,7 +19,6 @@ from ...utils import (
add_start_docstrings_to_model_forward,
replace_return_docstrings,
)
from ...utils.deprecation import deprecate_kwarg
from ..auto import AutoModel, AutoModelForCausalLM
from .configuration_new_task_model import NewTaskModelConfig
@ -255,7 +254,8 @@ class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin):
token_type_ids,
past_key_values,
cache_position,
input_tensor,
input_ids=None,
inputs_embeds=None,
is_training: bool = False,
):
if self.config.text_config._attn_implementation == "flash_attention_2":
@ -265,7 +265,8 @@ class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin):
using_static_cache = isinstance(past_key_values, StaticCache)
min_dtype = torch.finfo(self.dtype).min
inputs_lead_dim, sequence_length = input_tensor.shape[:2]
inputs_lead_dim = input_ids.shape[0] if input_ids is not None else inputs_embeds.shape[0]
sequence_length = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
if using_static_cache:
target_length = past_key_values.get_max_cache_shape()
elif isinstance(past_key_values, HybridCache):
@ -296,20 +297,16 @@ class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin):
if attention_mask is not None:
causal_mask = causal_mask.clone() # copy to contiguous memory for in-place edit
mask_length = attention_mask.shape[-1]
# First unmask prefix tokens during training
if is_training:
causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill(
token_type_ids[:, None, None, :].to(causal_mask.device) == 0, 0
)
# Then apply padding mask (will mask pad tokens)
padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :].to(causal_mask.device)
padding_mask = padding_mask == 0
causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill(
padding_mask, min_dtype
)
# we are training thus we need to create a full mask on the image + prefix but causal on suffix
if is_training:
causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill(
token_type_ids[:, None, None, :].to(causal_mask.device) == 0, 0
)
return causal_mask
def get_image_features(self, pixel_values: torch.FloatTensor):
@ -328,7 +325,6 @@ class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin):
image_features = image_features / (self.config.text_config.hidden_size**0.5)
return image_features
@deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
@add_start_docstrings_to_model_forward(NEW_TASK_MODEL_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=NewTaskModelCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
def forward(
@ -349,17 +345,16 @@ class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin):
num_logits_to_keep: int = 0,
) -> Union[Tuple, NewTaskModelCausalLMOutputWithPast]:
r"""
Args:
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
config.text_config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
(masked), the loss is only computed for the tokens with labels in `[0, ..., config.text_config.vocab_size]`.
logits_to_keep (`int` or `torch.Tensor`, *optional*):
If an `int`, compute logits for the last `logits_to_keep` tokens. If `0`, calculate logits for all
num_logits_to_keep (`int`, *optional*):
Calculate logits for the last `num_logits_to_keep` tokens. If `0`, calculate logits for all
`input_ids` (special case). Only last token logits are needed for generation, and calculating them only for that
token can save memory, which becomes pretty significant for long sequences or large vocabulary size.
If a `torch.Tensor`, must be 1D corresponding to the indices to keep in the sequence length dimension.
This is useful when using packed tensor format (single dimension for batch and sequence length).
Returns:
@ -423,7 +418,7 @@ class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin):
attention_mask=None,
token_type_ids=None,
use_cache=True,
logits_to_keep=None,
num_logits_to_keep=None,
labels=None,
**kwargs,
):
@ -436,7 +431,7 @@ class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin):
position_ids=position_ids,
cache_position=cache_position,
use_cache=use_cache,
logits_to_keep=logits_to_keep,
num_logits_to_keep=num_logits_to_keep,
token_type_ids=token_type_ids,
**kwargs,
)
@ -450,12 +445,10 @@ class NewTaskModelForNewTask(NewTaskModelPreTrainedModel, GenerationMixin):
model_inputs["pixel_values"] = pixel_values
is_training = token_type_ids is not None and labels is not None
if cache_position[0] == 0 and isinstance(past_key_values, HybridCache):
input_tensor = inputs_embeds if inputs_embeds is not None else input_ids
causal_mask = self._update_causal_mask(
attention_mask, token_type_ids, past_key_values, cache_position, input_tensor, is_training
attention_mask, token_type_ids, past_key_values, cache_position, input_ids, inputs_embeds, is_training
)
model_inputs["attention_mask"] = causal_mask
return model_inputs
def resize_token_embeddings(

View File

@ -356,7 +356,6 @@ class SuperPreTrainedModel(PreTrainedModel):
_supports_cache_class = True
_supports_quantized_cache = True
_supports_static_cache = True
_supports_attention_backend = True
def _init_weights(self, module):
std = self.config.initializer_range
@ -621,9 +620,7 @@ class SuperModel(SuperPreTrainedModel):
if attention_mask is not None:
causal_mask = causal_mask.clone() # copy to contiguous memory for in-place edit
mask_length = attention_mask.shape[-1]
padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :].to(
causal_mask.device
)
padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :]
padding_mask = padding_mask == 0
causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill(
padding_mask, min_dtype

View File

@ -45,7 +45,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.14.0", "To fix: pip install -r examples/pytorch/audio-classification/requirements.txt")

View File

@ -54,7 +54,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")

View File

@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")

View File

@ -49,7 +49,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
logger = get_logger(__name__)

View File

@ -43,7 +43,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")

View File

@ -48,7 +48,7 @@ Any model supported by the AutoModelForMaskedImageModeling API can be used.
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")

View File

@ -53,7 +53,7 @@ Any model supported by the AutoModelForMaskedImageModeling API can be used.
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")

View File

@ -46,7 +46,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/instance-segmentation/requirements.txt")

View File

@ -52,7 +52,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/instance-segmentation/requirements.txt")

View File

@ -55,7 +55,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")

View File

@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
logger = get_logger(__name__)

View File

@ -58,7 +58,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")

View File

@ -60,7 +60,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
logger = get_logger(__name__)

View File

@ -54,7 +54,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")

View File

@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
logger = get_logger(__name__)
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")

View File

@ -47,7 +47,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")

View File

@ -46,7 +46,7 @@ from transformers.utils import check_min_version, send_example_telemetry
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
logger = logging.getLogger(__name__)

View File

@ -54,7 +54,7 @@ from transformers.utils import check_min_version, send_example_telemetry
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
logger = get_logger(__name__)
# You should update this to your particular problem to have better documentation of `model_type`

View File

@ -48,7 +48,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/object-detection/requirements.txt")

View File

@ -51,7 +51,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
logging.basicConfig(level=logging.INFO)
logger = get_logger(__name__)

View File

@ -50,7 +50,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")

View File

@ -48,7 +48,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")

View File

@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")

View File

@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")

View File

@ -46,7 +46,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")

View File

@ -51,7 +51,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/semantic-segmentation/requirements.txt")

View File

@ -50,7 +50,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
logger = get_logger(__name__)

View File

@ -50,7 +50,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")

View File

@ -53,7 +53,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")

View File

@ -48,7 +48,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")

View File

@ -52,7 +52,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")

View File

@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
logger = get_logger(__name__)
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")

View File

@ -47,7 +47,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")

View File

@ -48,7 +48,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")

View File

@ -49,7 +49,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
logger = get_logger(__name__)

View File

@ -48,7 +48,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")

View File

@ -49,7 +49,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")

View File

@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
logger = get_logger(__name__)
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")

View File

@ -52,7 +52,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")

View File

@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
logger = get_logger(__name__)
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")

View File

@ -1,78 +0,0 @@
import json
from typing import Any, Dict
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.quantizers import HfQuantizer, register_quantization_config, register_quantizer
from transformers.utils.quantization_config import QuantizationConfigMixin
@register_quantization_config("custom")
class CustomConfig(QuantizationConfigMixin):
def __init__(self):
self.quant_method = "custom"
self.bits = 8
def to_dict(self) -> Dict[str, Any]:
output = {
"num_bits": self.bits,
}
return output
def __repr__(self):
config_dict = self.to_dict()
return f"{self.__class__.__name__} {json.dumps(config_dict, indent=2, sort_keys=True)}\n"
def to_diff_dict(self) -> Dict[str, Any]:
config_dict = self.to_dict()
default_config_dict = CustomConfig().to_dict()
serializable_config_dict = {}
for key, value in config_dict.items():
if value != default_config_dict[key]:
serializable_config_dict[key] = value
return serializable_config_dict
@register_quantizer("custom")
class CustomQuantizer(HfQuantizer):
def __init__(self, quantization_config: QuantizationConfigMixin, **kwargs):
super().__init__(quantization_config, **kwargs)
self.quantization_config = quantization_config
self.scale_map = {}
self.device = kwargs.get("device", "cuda" if torch.cuda.is_available() else "cpu")
self.torch_dtype = kwargs.get("torch_dtype", torch.float32)
def _process_model_before_weight_loading(self, model, **kwargs):
return True
def _process_model_after_weight_loading(self, model, **kwargs):
return True
def is_serializable(self) -> bool:
return True
def is_trainable(self) -> bool:
return False
model_8bit = AutoModelForCausalLM.from_pretrained(
"facebook/opt-350m", quantization_config=CustomConfig(), torch_dtype="auto"
)
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
input_text = "once there is"
inputs = tokenizer(input_text, return_tensors="pt")
output = model_8bit.generate(
**inputs,
max_length=100,
num_return_sequences=1,
no_repeat_ngram_size=2,
)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)

View File

@ -1,257 +0,0 @@
import json
from typing import Any, Dict, List, Optional
import torch
import torch.nn as nn
import torch.nn.functional as F
from accelerate import init_empty_weights
from huggingface_hub import HfApi
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.quantizers import HfQuantizer, get_module_from_name, register_quantization_config, register_quantizer
from transformers.utils.quantization_config import QuantizationConfigMixin
# Implement INT8 Symmetric Linear layer
class Int8SymmetricLinear(torch.nn.Module):
def __init__(self, in_features, out_features, bias, dtype=torch.float32):
super().__init__()
self.in_features = in_features
self.out_features = out_features
self.register_buffer("weight", torch.zeros((out_features, in_features), dtype=torch.int8))
self.register_buffer("weight_scale", torch.zeros((out_features, 1), dtype=dtype))
if bias:
self.register_buffer("bias", torch.zeros((self.out_features), dtype=dtype))
else:
self.bias = None
def forward(self, x):
dequant_weight = self.weight * self.weight_scale
output = F.linear(x, dequant_weight)
if self.bias is not None:
output = output + self.bias
return output
# Function to replace standard linear layers with INT8 symmetric quantized layers
def _replace_with_int8_symmetric_linear(
model,
modules_to_not_convert=None,
current_key_name=None,
quantization_config=None,
has_been_replaced=False,
pre_quantized=False,
):
"""
Recursively replaces nn.Linear modules with Int8SymmetricLinear modules.
"""
if current_key_name is None:
current_key_name = []
for name, module in model.named_children():
current_key_name.append(name)
if (isinstance(module, nn.Linear)) and name not in modules_to_not_convert:
# Check if the current key is not in the `modules_to_not_convert`
current_key_name_str = ".".join(current_key_name)
if not any(
(key + "." in current_key_name_str) or (key == current_key_name_str) for key in modules_to_not_convert
):
with init_empty_weights(include_buffers=True):
in_features = module.in_features
out_features = module.out_features
model._modules[name] = Int8SymmetricLinear(
in_features, out_features, module.bias is not None, dtype=module.weight.dtype
)
has_been_replaced = True
model._modules[name].requires_grad_(False)
if len(list(module.children())) > 0:
_, has_been_replaced = _replace_with_int8_symmetric_linear(
module,
modules_to_not_convert,
current_key_name,
quantization_config,
has_been_replaced=has_been_replaced,
pre_quantized=pre_quantized,
)
# Remove the last key for recursion
current_key_name.pop(-1)
return model, has_been_replaced
def replace_with_int8_symmetric_linear(
model, modules_to_not_convert=None, current_key_name=None, quantization_config=None, pre_quantized=False
):
"""
Main function to replace model layers with INT8 symmetric quantized versions.
"""
modules_to_not_convert = ["lm_head"] if modules_to_not_convert is None else modules_to_not_convert
if quantization_config.modules_to_not_convert is not None:
modules_to_not_convert.extend(quantization_config.modules_to_not_convert)
modules_to_not_convert = list(set(modules_to_not_convert))
model, has_been_replaced = _replace_with_int8_symmetric_linear(
model, modules_to_not_convert, current_key_name, quantization_config, pre_quantized=pre_quantized
)
if not has_been_replaced:
raise ValueError(
"You are loading your model using INT8 symmetric quantization but no linear modules were found in your model."
)
return model
@register_quantization_config("int8_symmetric")
class Int8SymmetricConfig(QuantizationConfigMixin):
"""
Configuration for INT8 symmetric quantization.
"""
def __init__(self, modules_to_not_convert: Optional[List[str]] = None, **kwargs):
self.quant_method = "int8_symmetric"
self.modules_to_not_convert = modules_to_not_convert
def __repr__(self):
config_dict = self.to_dict()
return f"{self.__class__.__name__} {json.dumps(config_dict, indent=2, sort_keys=True)}\n"
def to_diff_dict(self) -> Dict[str, Any]:
config_dict = self.to_dict()
default_config_dict = Int8SymmetricConfig().to_dict()
serializable_config_dict = {}
for key, value in config_dict.items():
if value != default_config_dict[key]:
serializable_config_dict[key] = value
return serializable_config_dict
@register_quantizer("int8_symmetric")
class Int8SymmetricQuantizer(HfQuantizer):
"""
Implementation of INT8 symmetric quantization.
"""
requires_calibration = False
requires_parameters_quantization = True
def __init__(self, quantization_config: QuantizationConfigMixin, **kwargs):
super().__init__(quantization_config, **kwargs)
self.quantization_config = quantization_config
def _process_model_before_weight_loading(self, model, **kwargs):
"""
Replace model's linear layers with quantized versions before loading weights.
"""
self.modules_to_not_convert = self.quantization_config.modules_to_not_convert
model = replace_with_int8_symmetric_linear(
model,
modules_to_not_convert=self.modules_to_not_convert,
quantization_config=self.quantization_config,
pre_quantized=self.pre_quantized,
)
def check_quantized_param(
self,
model,
param_value: "torch.Tensor",
param_name: str,
state_dict: Dict[str, Any],
**kwargs,
):
module, tensor_name = get_module_from_name(model, param_name)
if isinstance(module, Int8SymmetricLinear):
if self.pre_quantized or tensor_name == "bias":
if tensor_name == "weight" and param_value.dtype != torch.int8:
raise ValueError("Expect quantized weights but got an unquantized weight")
return False
else:
if tensor_name == "weight_scale":
raise ValueError("Expect unquantized weights but got a quantized weight_scale")
return True
return False
def create_quantized_param(
self,
model,
param_value: "torch.Tensor",
param_name: str,
target_device: "torch.device",
state_dict: Dict[str, Any],
unexpected_keys: Optional[List[str]] = None,
):
"""
Quantizes weights to INT8 symmetric format.
"""
abs_max_per_row = torch.max(torch.abs(param_value), dim=1, keepdim=True)[0].clamp(min=1e-5)
weight_scale = abs_max_per_row / 127.0
weight_quantized = torch.round(param_value / weight_scale).clamp(-128, 127).to(torch.int8)
module, tensor_name = get_module_from_name(model, param_name)
module._buffers[tensor_name] = weight_quantized.to(target_device)
module._buffers["weight_scale"] = weight_scale.to(target_device)
def update_missing_keys(self, model, missing_keys: List[str], prefix: str) -> List[str]:
not_missing_keys = []
for name, module in model.named_modules():
if isinstance(module, Int8SymmetricLinear):
for missing in missing_keys:
if (
(name in missing or name in f"{prefix}.{missing}")
and not missing.endswith(".weight")
and not missing.endswith(".bias")
):
not_missing_keys.append(missing)
return [k for k in missing_keys if k not in not_missing_keys]
def _process_model_after_weight_loading(self, model, **kwargs):
"""
Post-processing after weights are loaded.
"""
return True
def is_serializable(self, safe_serialization=None):
return True
@property
def is_trainable(self) -> bool:
return False
# Example usage
if __name__ == "__main__":
model_int8 = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3.2-1B", quantization_config=Int8SymmetricConfig(), torch_dtype=torch.float, device_map="cpu"
)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
input_text = "once there is"
inputs = tokenizer(input_text, return_tensors="pt").to("cpu")
output = model_int8.generate(
**inputs,
max_length=100,
num_return_sequences=1,
no_repeat_ngram_size=2,
)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)
# Save and upload to HUB
output_model_dir = "Llama-3.2-1B-INT8-CUSTOM"
model_int8.save_pretrained(output_model_dir)
tokenizer.save_pretrained(output_model_dir)
api = HfApi()
repo_id = "medmekk/Llama-3.2-1B-INT8-CUSTOM"
api.create_repo(repo_id, private=False)
api.upload_folder(folder_path=output_model_dir, repo_id=repo_id, repo_type="model")

View File

@ -1,5 +1,5 @@
datasets==2.3.2
transformers==4.48.0
transformers==4.38.0
wandb==0.13.1
evaluate==0.2.2
scikit-learn==1.5.0

View File

@ -51,7 +51,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version(
"datasets>=1.8.0", "To fix: pip install -r examples/tensorflow/contrastive-image-text/requirements.txt"

View File

@ -55,7 +55,7 @@ from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")

View File

@ -50,7 +50,7 @@ from transformers.utils import check_min_version, send_example_telemetry
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.50.0.dev0")
check_min_version("4.49.0.dev0")
logger = logging.getLogger(__name__)

Some files were not shown because too many files have changed in this diff Show More