mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-21 01:23:56 +08:00
Compare commits
62 Commits
fix_nightl
...
v4.56.2
Author | SHA1 | Date | |
---|---|---|---|
cd74917ffc | |||
29261df824 | |||
694410d3b2 | |||
240ebfe57e | |||
a55e503138 | |||
91393fe4cc | |||
3ce5629f1c | |||
26a7e6d76e | |||
d56f9162e7 | |||
e62b9aae85 | |||
c58d7d7a52 | |||
ad6b8982a4 | |||
e7d351ceba | |||
1067577ad2 | |||
7efb4c87ca | |||
828a27fd32 | |||
74a24217f5 | |||
ffdd10fced | |||
f0e778112f | |||
f68eb5f135 | |||
d888bd435d | |||
11a6b95553 | |||
b07144ac27 | |||
008c0ba8e2 | |||
89ef1b6e0b | |||
2e0f1d6a37 | |||
68013c505a | |||
ffcb344612 | |||
8c7f685079 | |||
d61fab1549 | |||
31336ab750 | |||
851b8f281d | |||
de9e2d7a2e | |||
7e1aee4db6 | |||
893d89e5e6 | |||
becab2c601 | |||
8acbbdcadf | |||
2300be3b41 | |||
b2b654afbf | |||
476cd7bab1 | |||
1499f9e356 | |||
10ddfb0be5 | |||
d10603f701 | |||
f9b9a5e884 | |||
b824f4986f | |||
c9ff166718 | |||
721d4aee81 | |||
98289c5546 | |||
e3d8fd730e | |||
821384d5d4 | |||
304225aa15 | |||
3c343c6601 | |||
6350636964 | |||
52aaa3f500 | |||
ed5dd2999c | |||
8b804311ba | |||
a3afebbbbe | |||
75d6f17de6 | |||
80f4c0c6a0 | |||
ff8b88a948 | |||
74ad608a2b | |||
c8c7623f20 |
6
.github/workflows/collated-reports.yml
vendored
6
.github/workflows/collated-reports.yml
vendored
@ -41,9 +41,3 @@ jobs:
|
||||
--job ${{ inputs.job }} \
|
||||
--report-repo-id ${{ inputs.report_repo_id }} \
|
||||
--gpu-name ${{ inputs.gpu_name }}
|
||||
|
||||
- name: Upload collated reports
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: collated_reports_${{ env.CI_SHA }}.json
|
||||
path: collated_reports_${{ env.CI_SHA }}.json
|
||||
|
250
.github/workflows/push-important-models.yml
vendored
250
.github/workflows/push-important-models.yml
vendored
@ -4,17 +4,6 @@ on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
|
||||
env:
|
||||
OUTPUT_SLACK_CHANNEL_ID: "C06L2SGMEEA"
|
||||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||
HF_HOME: /mnt/cache
|
||||
TRANSFORMERS_IS_CI: yes
|
||||
OMP_NUM_THREADS: 8
|
||||
MKL_NUM_THREADS: 8
|
||||
RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
|
||||
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
|
||||
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||
|
||||
jobs:
|
||||
get_modified_models:
|
||||
name: "Get all modified files"
|
||||
@ -25,111 +14,144 @@ jobs:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Get changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@1c8e6069583811afb28f97afeaf8e7da80c6be5c
|
||||
- name: Get changed files using `actions/github-script`
|
||||
id: get-changed-files
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
files: src/transformers/models/**
|
||||
script: |
|
||||
let files = [];
|
||||
|
||||
// Only handle push events
|
||||
if (context.eventName === 'push') {
|
||||
const afterSha = context.payload.after;
|
||||
const branchName = context.payload.ref.replace('refs/heads/', '');
|
||||
|
||||
let baseSha;
|
||||
|
||||
if (branchName === 'main') {
|
||||
console.log('Push to main branch, comparing to parent commit');
|
||||
// Get the parent commit of the pushed commit
|
||||
const { data: commit } = await github.rest.repos.getCommit({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
ref: afterSha
|
||||
});
|
||||
baseSha = commit.parents[0]?.sha;
|
||||
if (!baseSha) {
|
||||
throw new Error('No parent commit found for the pushed commit');
|
||||
}
|
||||
} else {
|
||||
console.log(`Push to branch ${branchName}, comparing to main`);
|
||||
baseSha = 'main';
|
||||
}
|
||||
|
||||
const { data: comparison } = await github.rest.repos.compareCommits({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
base: baseSha,
|
||||
head: afterSha
|
||||
});
|
||||
|
||||
// Include added, modified, and renamed files
|
||||
files = comparison.files
|
||||
.filter(file => file.status === 'added' || file.status === 'modified' || file.status === 'renamed')
|
||||
.map(file => file.filename);
|
||||
}
|
||||
|
||||
// Include all files under src/transformers/ (not just models subdirectory)
|
||||
const filteredFiles = files.filter(file =>
|
||||
file.startsWith('src/transformers/')
|
||||
);
|
||||
|
||||
core.setOutput('changed_files', filteredFiles.join(' '));
|
||||
core.setOutput('any_changed', filteredFiles.length > 0 ? 'true' : 'false');
|
||||
|
||||
- name: Run step if only the files listed above change
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
id: set-matrix
|
||||
- name: Parse changed files with Python
|
||||
if: steps.get-changed-files.outputs.any_changed == 'true'
|
||||
env:
|
||||
ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
|
||||
CHANGED_FILES: ${{ steps.get-changed-files.outputs.changed_files }}
|
||||
id: set-matrix
|
||||
run: |
|
||||
model_arrays=()
|
||||
for file in $ALL_CHANGED_FILES; do
|
||||
model_path="${file#*models/}"
|
||||
model_path="models/${model_path%%/*}"
|
||||
if grep -qFx "$model_path" utils/important_models.txt; then
|
||||
# Append the file to the matrix string
|
||||
model_arrays+=("$model_path")
|
||||
fi
|
||||
done
|
||||
matrix_string=$(printf '"%s", ' "${model_arrays[@]}" | sed 's/, $//')
|
||||
echo "matrix=[$matrix_string]" >> $GITHUB_OUTPUT
|
||||
test_modified_files:
|
||||
python3 - << 'EOF'
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
|
||||
# Add the utils directory to Python path
|
||||
sys.path.insert(0, 'utils')
|
||||
|
||||
# Import the important models list
|
||||
from important_files import IMPORTANT_MODELS
|
||||
|
||||
print(f"Important models: {IMPORTANT_MODELS}")
|
||||
|
||||
# Get the changed files from the previous step
|
||||
changed_files_str = os.environ.get('CHANGED_FILES', '')
|
||||
changed_files = changed_files_str.split() if changed_files_str else []
|
||||
|
||||
# Filter to only Python files
|
||||
python_files = [f for f in changed_files if f.endswith('.py')]
|
||||
print(f"Python files changed: {python_files}")
|
||||
|
||||
result_models = set()
|
||||
|
||||
# Specific files that trigger all models
|
||||
transformers_utils_files = [
|
||||
'modeling_utils.py',
|
||||
'modeling_rope_utils.py',
|
||||
'modeling_flash_attention_utils.py',
|
||||
'modeling_attn_mask_utils.py',
|
||||
'cache_utils.py',
|
||||
'masking_utils.py',
|
||||
'pytorch_utils.py'
|
||||
]
|
||||
|
||||
# Single loop through all Python files
|
||||
for file in python_files:
|
||||
# Check for files under src/transformers/models/
|
||||
if file.startswith('src/transformers/models/'):
|
||||
remaining_path = file[len('src/transformers/models/'):]
|
||||
if '/' in remaining_path:
|
||||
model_dir = remaining_path.split('/')[0]
|
||||
if model_dir in IMPORTANT_MODELS:
|
||||
result_models.add(model_dir)
|
||||
print(f"Added model directory: {model_dir}")
|
||||
|
||||
# Check for specific files under src/transformers/ or src/transformers/generation/ files
|
||||
elif file.startswith('src/transformers/generation/') or \
|
||||
(file.startswith('src/transformers/') and os.path.basename(file) in transformers_utils_files):
|
||||
print(f"Found core file: {file} - including all important models")
|
||||
result_models.update(IMPORTANT_MODELS)
|
||||
break # No need to continue once we include all models
|
||||
|
||||
# Convert to sorted list and create matrix
|
||||
result_list = sorted(list(result_models))
|
||||
print(f"Final model list: {result_list}")
|
||||
|
||||
if result_list:
|
||||
matrix_json = json.dumps(result_list)
|
||||
print(f"matrix={matrix_json}")
|
||||
|
||||
# Write to GITHUB_OUTPUT
|
||||
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
|
||||
f.write(f"matrix={matrix_json}\n")
|
||||
else:
|
||||
print("matrix=[]")
|
||||
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
|
||||
f.write("matrix=[]\n")
|
||||
EOF
|
||||
|
||||
model-ci:
|
||||
name: Model CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
needs: get_modified_models
|
||||
name: Slow & FA2 tests
|
||||
runs-on:
|
||||
group: aws-g5-4xlarge-cache
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-gpu
|
||||
options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' && fromJson(needs.get_modified_models.outputs.matrix)[0] != null }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
model-name: ${{ fromJson(needs.get_modified_models.outputs.matrix) }}
|
||||
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install locally transformers & other libs
|
||||
run: |
|
||||
apt install sudo
|
||||
sudo -H pip install --upgrade pip
|
||||
sudo -H pip uninstall -y transformers
|
||||
sudo -H pip install -U -e ".[testing]"
|
||||
MAX_JOBS=4 pip install flash-attn --no-build-isolation
|
||||
pip install bitsandbytes
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
run: pip freeze
|
||||
|
||||
- name: Run FA2 tests
|
||||
id: run_fa2_tests
|
||||
run:
|
||||
pytest -rsfE -m "flash_attn_test" --make-reports=${{ matrix.model-name }}_fa2_tests/ tests/${{ matrix.model-name }}/test_modeling_*
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.model-name }}_fa2_tests"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.model-name }}_fa2_tests
|
||||
path: /transformers/reports/${{ matrix.model-name }}_fa2_tests
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
|
||||
title: 🤗 Results of the FA2 tests - ${{ matrix.model-name }}
|
||||
status: ${{ steps.run_fa2_tests.conclusion}}
|
||||
slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
|
||||
- name: Run integration tests
|
||||
id: run_integration_tests
|
||||
if: always()
|
||||
run:
|
||||
pytest -rsfE -k "IntegrationTest" --make-reports=tests_integration_${{ matrix.model-name }} tests/${{ matrix.model-name }}/test_modeling_*
|
||||
|
||||
- name: "Test suite reports artifacts: tests_integration_${{ matrix.model-name }}"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: tests_integration_${{ matrix.model-name }}
|
||||
path: /transformers/reports/tests_integration_${{ matrix.model-name }}
|
||||
|
||||
- name: Post to Slack
|
||||
if: always()
|
||||
uses: huggingface/hf-workflows/.github/actions/post-slack@main
|
||||
with:
|
||||
slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
|
||||
title: 🤗 Results of the Integration tests - ${{ matrix.model-name }}
|
||||
status: ${{ steps.run_integration_tests.conclusion}}
|
||||
slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
|
||||
- name: Tailscale # In order to be able to SSH when a test fails
|
||||
if: ${{ runner.debug == '1'}}
|
||||
uses: huggingface/tailscale-action@v1
|
||||
with:
|
||||
authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }}
|
||||
slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
|
||||
slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
waitForSSH: true
|
||||
if: needs.get_modified_models.outputs.matrix != '' && needs.get_modified_models.outputs.matrix != '[]'
|
||||
with:
|
||||
job: run_models_gpu
|
||||
slack_report_channel: "#transformers-ci-push"
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
ci_event: push
|
||||
report_repo_id: hf-internal-testing/transformers_ci_push
|
||||
commit_sha: ${{ github.sha }}
|
||||
models: ${{ needs.get_modified_models.outputs.matrix }}
|
||||
secrets: inherit
|
||||
|
22
.github/workflows/self-nightly-caller.yml
vendored
22
.github/workflows/self-nightly-caller.yml
vendored
@ -12,12 +12,34 @@ on:
|
||||
branches:
|
||||
- run_ci_with_nightly_torch*
|
||||
|
||||
# Used for `push` to easily modify the target workflow runs to compare against
|
||||
env:
|
||||
prev_workflow_run_id: ""
|
||||
other_workflow_run_id: ""
|
||||
|
||||
|
||||
jobs:
|
||||
build_nightly_torch_ci_images:
|
||||
name: Build CI Docker Images with nightly torch
|
||||
uses: ./.github/workflows/build-nightly-ci-docker-images.yml
|
||||
secrets: inherit
|
||||
|
||||
setup:
|
||||
name: Setup
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Setup
|
||||
run: |
|
||||
mkdir "setup_values"
|
||||
echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt"
|
||||
echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt"
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: setup_values
|
||||
path: setup_values
|
||||
|
||||
model-ci:
|
||||
name: Model CI
|
||||
needs: build_nightly_torch_ci_images
|
||||
|
11
.github/workflows/self-scheduled.yml
vendored
11
.github/workflows/self-scheduled.yml
vendored
@ -31,7 +31,10 @@ on:
|
||||
commit_sha:
|
||||
required: false
|
||||
type: string
|
||||
|
||||
models:
|
||||
default: ""
|
||||
required: false
|
||||
type: string
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
@ -68,7 +71,7 @@ jobs:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
git fetch && git checkout ${{ github.sha }}
|
||||
git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
|
||||
|
||||
- name: Cleanup
|
||||
working-directory: /transformers
|
||||
@ -87,7 +90,7 @@ jobs:
|
||||
working-directory: /transformers/tests
|
||||
run: |
|
||||
if [ "${{ inputs.job }}" = "run_models_gpu" ]; then
|
||||
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
|
||||
echo "folder_slices=$(python3 ../utils/split_model_tests.py --models '${{ inputs.models }}' --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
|
||||
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
|
||||
echo "runner_map=$(python3 ../utils/get_runner_map.py)" >> $GITHUB_OUTPUT
|
||||
elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then
|
||||
@ -512,7 +515,7 @@ jobs:
|
||||
run_quantization_torch_gpu,
|
||||
run_extract_warnings
|
||||
]
|
||||
if: ${{ always() }}
|
||||
if: always() && !cancelled()
|
||||
uses: ./.github/workflows/slack-report.yml
|
||||
with:
|
||||
job: ${{ inputs.job }}
|
||||
|
6
.github/workflows/slack-report.yml
vendored
6
.github/workflows/slack-report.yml
vendored
@ -36,7 +36,7 @@ jobs:
|
||||
send_results:
|
||||
name: Send results to webhook
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
if: always() && !cancelled()
|
||||
steps:
|
||||
- name: Preliminary job status
|
||||
shell: bash
|
||||
@ -75,6 +75,8 @@ jobs:
|
||||
SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_EVENT: ${{ inputs.ci_event }}
|
||||
# This `CI_TITLE` would be empty for `schedule` or `workflow_run` events.
|
||||
CI_TITLE: ${{ github.event.head_commit.message }}
|
||||
CI_SHA: ${{ inputs.commit_sha || github.sha }}
|
||||
CI_TEST_JOB: ${{ inputs.job }}
|
||||
SETUP_STATUS: ${{ inputs.setup_status }}
|
||||
@ -91,7 +93,7 @@ jobs:
|
||||
python utils/notification_service.py "${{ inputs.quantization_matrix }}"
|
||||
else
|
||||
python utils/notification_service.py "${{ inputs.folder_slices }}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||
- name: Failure table artifacts
|
||||
|
@ -32,7 +32,10 @@ RUN python3 -m pip uninstall -y flax jax
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir -U timm
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract
|
||||
RUN [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git || echo "Don't install detectron2 with nightly torch"
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir pytesseract
|
||||
|
||||
RUN python3 -m pip install -U "itsdangerous<2.1.0"
|
||||
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
|
||||
@ -41,6 +44,8 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef
|
||||
|
||||
# For bettertransformer
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum
|
||||
# For kernels
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/kernels@main#egg=kernels
|
||||
|
||||
# For video model testing
|
||||
RUN python3 -m pip install --no-cache-dir av
|
||||
@ -52,7 +57,7 @@ RUN python3 -m pip install --no-cache-dir bitsandbytes
|
||||
RUN python3 -m pip install --no-cache-dir quanto
|
||||
|
||||
# After using A10 as CI runner, let's run FA2 tests
|
||||
RUN python3 -m pip uninstall -y ninja && python3 -m pip install --no-cache-dir ninja && python3 -m pip install flash-attn --no-cache-dir --no-build-isolation
|
||||
RUN [ "$PYTORCH" != "pre" ] && python3 -m pip uninstall -y ninja && python3 -m pip install --no-cache-dir ninja && python3 -m pip install flash-attn --no-cache-dir --no-build-isolation || echo "Don't install FA2 with nightly torch"
|
||||
|
||||
# TODO (ydshieh): check this again
|
||||
# `quanto` will install `ninja` which leads to many `CUDA error: an illegal memory access ...` in some model tests
|
||||
|
@ -17,6 +17,7 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip && \
|
||||
jupyter \
|
||||
tensorflow \
|
||||
torch
|
||||
RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/kernels@main#egg=kernels
|
||||
|
||||
RUN git clone https://github.com/NVIDIA/apex
|
||||
RUN cd apex && \
|
||||
|
@ -373,6 +373,8 @@
|
||||
- sections:
|
||||
- local: model_doc/albert
|
||||
title: ALBERT
|
||||
- local: model_doc/apertus
|
||||
title: Apertus
|
||||
- local: model_doc/arcee
|
||||
title: Arcee
|
||||
- local: model_doc/bamba
|
||||
|
@ -15,6 +15,7 @@ rendered properly in your Markdown viewer.
|
||||
-->
|
||||
|
||||
# Caching
|
||||
|
||||
Imagine you're having a conversation with someone, and instead of remembering what they previously said, they have to start from scratch every time you respond. This would be slow and inefficient, right?
|
||||
|
||||
You can extend this analogy to transformer models. Autoregressive model generation can be slow because it makes a prediction one token at a time. Each new prediction is dependent on all the previous context.
|
||||
@ -107,7 +108,7 @@ model_id = "meta-llama/Llama-2-7b-chat-hf"
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.bfloat16, device_map=device)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
past_key_values = DynamicCache()
|
||||
past_key_values = DynamicCache(config=model.config)
|
||||
messages = [{"role": "user", "content": "Hello, what's your name."}]
|
||||
inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt", return_dict=True).to(model.device)
|
||||
|
||||
@ -138,7 +139,7 @@ The cache position tracks where to insert new tokens in the attention cache. It
|
||||
Cache position is used internally for two purposes:
|
||||
|
||||
1. Selecting new tokens to process in the input sequence and ensuring only tokens that haven’t been cached yet are passed to the model's `forward`.
|
||||
2. Storing key/value pairs at the correct positions in the cache. This is especially important for fixed-size caches, like [`StaticCache`], that pre-allocates a specific cache length.
|
||||
2. Storing key/value pairs at the correct positions in the cache. This is especially important for fixed-size caches, that pre-allocates a specific cache length.
|
||||
|
||||
The generation loop usually takes care of the cache position, but if you're writing a custom generation method, it is important that cache positions are accurate since they are used to write and read key/value states into fixed slots.
|
||||
|
||||
|
@ -33,6 +33,7 @@ Add the `gguf_file` parameter to [`~PreTrainedModel.from_pretrained`] to specify
|
||||
|
||||
```py
|
||||
# pip install gguf
|
||||
import torch
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
|
||||
model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
||||
|
@ -227,7 +227,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
user_prompts = ["Hello, what's your name?", "Btw, yesterday I was on a rock concert."]
|
||||
|
||||
past_key_values = DynamicCache()
|
||||
past_key_values = DynamicCache(config=model.config)
|
||||
|
||||
messages = []
|
||||
for prompt in user_prompts:
|
||||
|
100
docs/source/en/model_doc/apertus.md
Normal file
100
docs/source/en/model_doc/apertus.md
Normal file
@ -0,0 +1,100 @@
|
||||
<!--Copyright 2025 The HuggingFace Team and the Swiss AI Initiative. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
|
||||
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
|
||||
rendered properly in your Markdown viewer.
|
||||
|
||||
-->
|
||||
|
||||
<div style="float: right;">
|
||||
<div class="flex flex-wrap space-x-1">
|
||||
<img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
|
||||
<img alt="FlashAttention" src="https://img.shields.io/badge/%E2%9A%A1%EF%B8%8E%20FlashAttention-eae0c8?style=flat">
|
||||
<img alt="SDPA" src="https://img.shields.io/badge/SDPA-DE3412?style=flat&logo=pytorch&logoColor=white">
|
||||
<img alt="Tensor parallelism" src="https://img.shields.io/badge/Tensor%20parallelism-06b6d4?style=flat&logoColor=white">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
# Apertus
|
||||
|
||||
[Apertus](https://www.swiss-ai.org) is a family of large language models from the Swiss AI Initiative.
|
||||
|
||||
> [!TIP]
|
||||
> Coming soon
|
||||
|
||||
The example below demonstrates how to generate text with [`Pipeline`] or the [`AutoModel`], and from the command line.
|
||||
|
||||
<hfoptions id="usage">
|
||||
<hfoption id="Pipeline">
|
||||
|
||||
```py
|
||||
import torch
|
||||
from transformers import pipeline
|
||||
|
||||
pipeline = pipeline(
|
||||
task="text-generation",
|
||||
model="swiss-ai/Apertus-8B",
|
||||
dtype=torch.bfloat16,
|
||||
device=0
|
||||
)
|
||||
pipeline("Plants create energy through a process known as")
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="AutoModel">
|
||||
|
||||
```py
|
||||
import torch
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
"swiss-ai/Apertus-8B",
|
||||
)
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"swiss-ai/Apertus-8B",
|
||||
dtype=torch.bfloat16,
|
||||
device_map="auto",
|
||||
attn_implementation="sdpa"
|
||||
)
|
||||
input_ids = tokenizer("Plants create energy through a process known as", return_tensors="pt").to("cuda")
|
||||
|
||||
output = model.generate(**input_ids)
|
||||
print(tokenizer.decode(output[0], skip_special_tokens=True))
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="transformers CLI">
|
||||
|
||||
```bash
|
||||
echo -e "Plants create energy through a process known as" | transformers run --task text-generation --model swiss-ai/Apertus-8B --device 0
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
</hfoptions>
|
||||
|
||||
## ApertusConfig
|
||||
|
||||
[[autodoc]] ApertusConfig
|
||||
|
||||
## ApertusModel
|
||||
|
||||
[[autodoc]] ApertusModel
|
||||
- forward
|
||||
|
||||
## ApertusForCausalLM
|
||||
|
||||
[[autodoc]] ApertusForCausalLM
|
||||
- forward
|
||||
|
||||
## ApertusForTokenClassification
|
||||
|
||||
[[autodoc]] ApertusForTokenClassification
|
||||
- forward
|
@ -45,7 +45,7 @@ results = keypoint_matcher([url_0, url_1], threshold=0.9)
|
||||
print(results[0])
|
||||
# {'keypoint_image_0': {'x': ..., 'y': ...}, 'keypoint_image_1': {'x': ..., 'y': ...}, 'score': ...}
|
||||
```
|
||||
<hfoption id="AutoModel">
|
||||
</hfoption>
|
||||
<hfoption id="AutoModel">
|
||||
|
||||
```py
|
||||
@ -65,7 +65,7 @@ processor = AutoImageProcessor.from_pretrained("zju-community/efficientloftr")
|
||||
model = AutoModelForKeypointMatching.from_pretrained("zju-community/efficientloftr")
|
||||
|
||||
inputs = processor(images, return_tensors="pt")
|
||||
with torch.no_grad():
|
||||
with torch.inference_mode():
|
||||
outputs = model(**inputs)
|
||||
|
||||
# Post-process to get keypoints and matches
|
||||
@ -92,7 +92,8 @@ processed_outputs = processor.post_process_keypoint_matching(outputs, image_size
|
||||
# EfficientLoFTR requires pairs of images
|
||||
images = [image1, image2]
|
||||
inputs = processor(images, return_tensors="pt")
|
||||
outputs = model(**inputs)
|
||||
with torch.inference_mode():
|
||||
outputs = model(**inputs)
|
||||
|
||||
# Extract matching information
|
||||
keypoints = outputs.keypoints # Keypoints in both images
|
||||
|
@ -150,7 +150,7 @@ visualizer("LLMs generate text through a process known as")
|
||||
)
|
||||
input_text = "LLMs generate text through a process known as"
|
||||
input_ids = tokenizer(input_text, return_tensors="pt").to(model.device)
|
||||
past_key_values = DynamicCache()
|
||||
past_key_values = DynamicCache(config=model.config)
|
||||
outputs = model.generate(**input_ids, max_new_tokens=50, past_key_values=past_key_values)
|
||||
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
||||
```
|
||||
|
@ -9,7 +9,7 @@ Unless required by applicable law or agreed to in writing, software distributed
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
-->
|
||||
*This model was released on 2023-09-23 and added to Hugging Face Transformers on 2025-08-19.*
|
||||
*This model was released on {release_date} and added to Hugging Face Transformers on 2025-08-19.*
|
||||
|
||||
<div style="float: right;">
|
||||
<div class="flex flex-wrap space-x-1">
|
||||
@ -48,14 +48,14 @@ import requests
|
||||
from PIL import Image, ImageDraw
|
||||
from transformers import AutoProcessor, Kosmos2_5ForConditionalGeneration, infer_device
|
||||
|
||||
repo = "ydshieh/kosmos-2.5"
|
||||
device = f"{infer_device()}:0"
|
||||
repo = "microsoft/kosmos-2.5"
|
||||
device = "cuda:0"
|
||||
dtype = torch.bfloat16
|
||||
model = Kosmos2_5ForConditionalGeneration.from_pretrained(repo, device_map=device, dtype=dtype)
|
||||
processor = AutoProcessor.from_pretrained(repo)
|
||||
|
||||
# sample image
|
||||
url = "https://huggingface.co/ydshieh/kosmos-2.5/resolve/main/receipt_00008.png"
|
||||
url = "https://huggingface.co/microsoft/kosmos-2.5/resolve/main/receipt_00008.png"
|
||||
image = Image.open(requests.get(url, stream=True).raw)
|
||||
|
||||
prompt = "<md>"
|
||||
@ -87,14 +87,14 @@ import requests
|
||||
from PIL import Image, ImageDraw
|
||||
from transformers import AutoProcessor, Kosmos2_5ForConditionalGeneration, infer_device
|
||||
|
||||
repo = "ydshieh/kosmos-2.5"
|
||||
device = f"{infer_device()}:0"
|
||||
repo = "microsoft/kosmos-2.5"
|
||||
device = "cuda:0"
|
||||
dtype = torch.bfloat16
|
||||
model = Kosmos2_5ForConditionalGeneration.from_pretrained(repo, device_map=device, dtype=dtype)
|
||||
processor = AutoProcessor.from_pretrained(repo)
|
||||
|
||||
# sample image
|
||||
url = "https://huggingface.co/ydshieh/kosmos-2.5/resolve/main/receipt_00008.png"
|
||||
url = "https://huggingface.co/microsoft/kosmos-2.5/resolve/main/receipt_00008.png"
|
||||
image = Image.open(requests.get(url, stream=True).raw)
|
||||
|
||||
# bs = 1
|
||||
@ -160,12 +160,52 @@ image.save("output.png")
|
||||
</hfoptions>
|
||||
|
||||
|
||||
## Example
|
||||
**Markdown Task:** For usage instructions, please refer to [md.py](https://huggingface.co/ydshieh/kosmos-2.5/blob/main/md.py).
|
||||
## Chat version
|
||||
|
||||
**OCR Task:** For usage instructions, please refer to [ocr.py](https://huggingface.co/ydshieh/kosmos-2.5/blob/main/ocr.py).
|
||||
The authors also released Kosmos-2.5 Chat, which is a chat version optimized for document understanding. You can use it like so:
|
||||
|
||||
```python
|
||||
import re
|
||||
import torch
|
||||
import requests
|
||||
from PIL import Image, ImageDraw
|
||||
from transformers import AutoProcessor, Kosmos2_5ForConditionalGeneration
|
||||
|
||||
repo = "microsoft/kosmos-2.5-chat"
|
||||
device = "cuda:0"
|
||||
dtype = torch.bfloat16
|
||||
|
||||
model = Kosmos2_5ForConditionalGeneration.from_pretrained(repo,
|
||||
device_map=device,
|
||||
torch_dtype=dtype,
|
||||
attn_implementation="flash_attention_2")
|
||||
processor = AutoProcessor.from_pretrained(repo)
|
||||
|
||||
# sample image
|
||||
url = "https://huggingface.co/microsoft/kosmos-2.5/resolve/main/receipt_00008.png"
|
||||
|
||||
image = Image.open(requests.get(url, stream=True).raw)
|
||||
|
||||
question = "What is the sub total of the receipt?"
|
||||
template = "<md>A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {} ASSISTANT:"
|
||||
prompt = template.format(question)
|
||||
inputs = processor(text=prompt, images=image, return_tensors="pt")
|
||||
|
||||
height, width = inputs.pop("height"), inputs.pop("width")
|
||||
raw_width, raw_height = image.size
|
||||
scale_height = raw_height / height
|
||||
scale_width = raw_width / width
|
||||
|
||||
inputs = {k: v.to(device) if v is not None else None for k, v in inputs.items()}
|
||||
inputs["flattened_patches"] = inputs["flattened_patches"].to(dtype)
|
||||
generated_ids = model.generate(
|
||||
**inputs,
|
||||
max_new_tokens=1024,
|
||||
)
|
||||
|
||||
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
|
||||
print(generated_text[0])
|
||||
```
|
||||
|
||||
## Kosmos2_5Config
|
||||
|
||||
|
@ -47,6 +47,8 @@ results = keypoint_matcher([url_0, url_1], threshold=0.9)
|
||||
print(results[0])
|
||||
# {'keypoint_image_0': {'x': ..., 'y': ...}, 'keypoint_image_1': {'x': ..., 'y': ...}, 'score': ...}
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="AutoModel">
|
||||
|
||||
```py
|
||||
@ -66,7 +68,7 @@ processor = AutoImageProcessor.from_pretrained("ETH-CVG/lightglue_superpoint")
|
||||
model = AutoModel.from_pretrained("ETH-CVG/lightglue_superpoint")
|
||||
|
||||
inputs = processor(images, return_tensors="pt")
|
||||
with torch.no_grad():
|
||||
with torch.inference_mode():
|
||||
outputs = model(**inputs)
|
||||
|
||||
# Post-process to get keypoints and matches
|
||||
@ -93,7 +95,8 @@ processed_outputs = processor.post_process_keypoint_matching(outputs, image_size
|
||||
# LightGlue requires pairs of images
|
||||
images = [image1, image2]
|
||||
inputs = processor(images, return_tensors="pt")
|
||||
outputs = model(**inputs)
|
||||
with torch.inference_mode():
|
||||
outputs = model(**inputs)
|
||||
|
||||
# Extract matching information
|
||||
keypoints0 = outputs.keypoints0 # Keypoints in first image
|
||||
|
@ -25,7 +25,7 @@ rendered properly in your Markdown viewer.
|
||||
|
||||
# OPT
|
||||
|
||||
[OPT](https://huggingface.co/papers/2205.01068) is a suite of open-source decoder-only pre-trained transformers whose parameters range from 125M to 175B. OPT models are designed for casual language modeling and aim to enable responsible and reproducible research at scale. OPT-175B is comparable in performance to GPT-3 with only 1/7th the carbon footprint.
|
||||
[OPT](https://huggingface.co/papers/2205.01068) is a suite of open-source decoder-only pre-trained transformers whose parameters range from 125M to 175B. OPT models are designed for causal language modeling and aim to enable responsible and reproducible research at scale. OPT-175B is comparable in performance to GPT-3 with only 1/7th the carbon footprint.
|
||||
|
||||
You can find all the original OPT checkpoints under the [OPT](https://huggingface.co/collections/facebook/opt-66ed00e15599f02966818844) collection.
|
||||
|
||||
|
@ -15,74 +15,126 @@ rendered properly in your Markdown viewer.
|
||||
-->
|
||||
*This model was released on 2024-09-17 and added to Hugging Face Transformers on 2024-09-14.*
|
||||
|
||||
# Pixtral
|
||||
|
||||
<div class="flex flex-wrap space-x-1">
|
||||
<img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
|
||||
<div style="float: right;">
|
||||
<div class="flex flex-wrap space-x-1">
|
||||
<img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
## Overview
|
||||
# Pixtral
|
||||
|
||||
The [Pixtral](https://huggingface.co/papers/2410.07073) model was released by the Mistral AI team in a [blog post](https://mistral.ai/news/pixtral-12b/). Pixtral is a multimodal version of [Mistral](mistral), incorporating a 400 million parameter vision encoder trained from scratch.
|
||||
|
||||
The intro from the blog says the following:
|
||||
|
||||
*Pixtral is trained to understand both natural images and documents, achieving 52.5% on the MMMU reasoning benchmark, surpassing a number of larger models. The model shows strong abilities in tasks such as chart and figure understanding, document question answering, multimodal reasoning and instruction following. Pixtral is able to ingest images at their natural resolution and aspect ratio, giving the user flexibility on the number of tokens used to process an image. Pixtral is also able to process any number of images in its long context window of 128K tokens. Unlike previous open-source models, Pixtral does not compromise on text benchmark performance to excel in multimodal tasks.*
|
||||
[Pixtral](https://huggingface.co/papers/2410.07073) is a multimodal model trained to understand natural images and documents. It accepts images in their natural resolution and aspect ratio without resizing or padding due to it's 2D RoPE embeddings. In addition, Pixtral has a long 128K token context window for processing a large number of images. Pixtral couples a 400M vision encoder with a 12B Mistral Nemo decoder.
|
||||
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/pixtral_architecture.webp"
|
||||
alt="drawing" width="600"/>
|
||||
|
||||
<small> Pixtral architecture. Taken from the <a href="https://mistral.ai/news/pixtral-12b/">blog post.</a> </small>
|
||||
|
||||
Tips:
|
||||
You can find all the original Pixtral checkpoints under the [Mistral AI](https://huggingface.co/mistralai/models?search=pixtral) organization.
|
||||
|
||||
- Pixtral is a multimodal model, taking images and text as input, and producing text as output.
|
||||
- This model follows the [Llava](llava) architecture. The model uses [`PixtralVisionModel`] for its vision encoder, and [`MistralForCausalLM`] for its language decoder.
|
||||
- The main contribution is the 2d ROPE (rotary position embeddings) on the images, and support for arbitrary image sizes (the images are not padded together nor are they resized).
|
||||
- Similar to [Llava](llava), the model internally replaces the `[IMG]` token placeholders by image embeddings from the vision encoder. The format for one or multiple prompts is the following:
|
||||
```
|
||||
"<s>[INST][IMG]\nWhat are the things I should be cautious about when I visit this place?[/INST]"
|
||||
```
|
||||
Then, the processor will replace each `[IMG]` token with a number of `[IMG]` tokens that depend on the height and the width of each image. Each *row* of the image is separated by an `[IMG_BREAK]` token, and each image is separated by an `[IMG_END]` token. It's advised to use the `apply_chat_template` method of the processor, which takes care of all of this and formats the text for you. If you're using `transformers>=4.49.0`, you can also get a vectorized output from `apply_chat_template`. See the [usage section](#usage) for more info.
|
||||
> [!TIP]
|
||||
> This model was contributed by [amyeroberts](https://huggingface.co/amyeroberts) and [ArthurZ](https://huggingface.co/ArthurZ).
|
||||
> Click on the Pixtral models in the right sidebar for more examples of how to apply Pixtral to different vision and language tasks.
|
||||
|
||||
<hfoptions id="usage">
|
||||
|
||||
This model was contributed by [amyeroberts](https://huggingface.co/amyeroberts) and [ArthurZ](https://huggingface.co/ArthurZ). The original code can be found [here](https://github.com/vllm-project/vllm/pull/8377).
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
At inference time, it's advised to use the processor's `apply_chat_template` method, which correctly formats the prompt for the model:
|
||||
<hfoption id="AutoModel">
|
||||
|
||||
```python
|
||||
import torch
|
||||
from transformers import AutoProcessor, LlavaForConditionalGeneration
|
||||
|
||||
model_id = "mistral-community/pixtral-12b"
|
||||
model = LlavaForConditionalGeneration.from_pretrained(model_id, dtype="auto", device_map="auto")
|
||||
processor = AutoProcessor.from_pretrained(model_id)
|
||||
model = LlavaForConditionalGeneration.from_pretrained(model_id, device_map="auto")
|
||||
|
||||
url_dog = "https://picsum.photos/id/237/200/300"
|
||||
url_mountain = "https://picsum.photos/seed/picsum/200/300"
|
||||
|
||||
chat = [
|
||||
{
|
||||
"role": "user", "content": [
|
||||
{"type": "text", "content": "Can this animal"},
|
||||
{"type": "image", "url": "https://picsum.photos/id/237/200/300"},
|
||||
{"type": "image", "url": url_dog},
|
||||
{"type": "text", "content": "live here?"},
|
||||
{"type": "image", "url": "https://picsum.photos/seed/picsum/200/300"}
|
||||
{"type": "image", "url" : url_mountain}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
inputs = processor.apply_chat_template(
|
||||
chat,
|
||||
add_generation_prompt=True,
|
||||
tokenize=True,
|
||||
return_dict=True,
|
||||
return_tensors="pt"
|
||||
).to(model.device)
|
||||
|
||||
inputs = processor.apply_chat_template(chat, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors"pt").to(model.device)
|
||||
generate_ids = model.generate(**inputs, max_new_tokens=500)
|
||||
output = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
|
||||
</hfoptions>
|
||||
|
||||
Quantization reduces the memory burden of large models by representing the weights in a lower precision. Refer to the [Quantization](../quantization/overview) overview for more available quantization backends.
|
||||
|
||||
The example below uses [bitsandbytes](../quantization/bitsandbytes) to quantize the model to 4-bits.
|
||||
|
||||
```python
|
||||
import torch
|
||||
import requests
|
||||
from PIL import Image
|
||||
from transformers import AutoProcessor, LlavaForConditionalGeneration, BitsAndBytesConfig
|
||||
|
||||
model_id = "mistral-community/pixtral-12b"
|
||||
|
||||
quantization_config = BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_quant_type="nf4",
|
||||
bnb_4bit_compute_dtype=torch.bfloat16
|
||||
)
|
||||
|
||||
model = LlavaForConditionalGeneration.from_pretrained(
|
||||
model_id,
|
||||
quantization_config=quantization_config,
|
||||
device_map="auto"
|
||||
)
|
||||
processor = AutoProcessor.from_pretrained(model_id)
|
||||
|
||||
dog_url = "https://picsum.photos/id/237/200/300"
|
||||
mountain_url = "https://picsum.photos/seed/picsum/200/300"
|
||||
dog_image = Image.open(requests.get(dog_url, stream=True).raw)
|
||||
mountain_image = Image.open(requests.get(mountain_url, stream=True).raw)
|
||||
|
||||
chat = [
|
||||
{
|
||||
"role": "user", "content": [
|
||||
{"type": "text", "text": "Can this animal"},
|
||||
{"type": "image"},
|
||||
{"type": "text", "text": "live here?"},
|
||||
{"type": "image"}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
prompt = processor.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
|
||||
inputs = processor(text=prompt, images=[dog_image, mountain_image], return_tensors="pt")
|
||||
|
||||
inputs["pixel_values"] = inputs["pixel_values"].to(model.dtype)
|
||||
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
||||
|
||||
generate_ids = model.generate(**inputs, max_new_tokens=100)
|
||||
output = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
|
||||
print(output)
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
- Pixtral uses [`PixtralVisionModel`] as the vision encoder and [`MistralForCausalLM`] for its language decoder.
|
||||
- The model internally replaces `[IMG]` token placeholders with image embeddings.
|
||||
|
||||
```py
|
||||
"<s>[INST][IMG]\nWhat are the things I should be cautious about when I visit this place?[/INST]"
|
||||
```
|
||||
|
||||
The `[IMG]` tokens are replaced with a number of `[IMG]` tokens that depend on the height and width of each image. Each row of the image is separated by a `[IMG_BREAK]` token and each image is separated by a `[IMG_END]` token. Use the [`~Processor.apply_chat_template`] method to handle these tokens for you.
|
||||
|
||||
## PixtralVisionConfig
|
||||
|
||||
[[autodoc]] PixtralVisionConfig
|
||||
|
@ -1,113 +1,97 @@
|
||||
<!--Copyright 2021 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
|
||||
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
|
||||
rendered properly in your Markdown viewer.
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
||||
⚠️ Note that this file is in Markdown but contains specific syntax
|
||||
for our doc-builder (similar to MDX) that may not render properly
|
||||
in your Markdown viewer.
|
||||
-->
|
||||
*This model was released on 2021-05-31 and added to Hugging Face Transformers on 2021-10-28.*
|
||||
|
||||
# SegFormer
|
||||
|
||||
<div class="flex flex-wrap space-x-1">
|
||||
<img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
|
||||
<div style="float: right;">
|
||||
<div class="flex flex-wrap space-x-1">
|
||||
<img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
## Overview
|
||||
# SegFormer
|
||||
|
||||
The SegFormer model was proposed in [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://huggingface.co/papers/2105.15203) by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping
|
||||
Luo. The model consists of a hierarchical Transformer encoder and a lightweight all-MLP decode head to achieve great
|
||||
results on image segmentation benchmarks such as ADE20K and Cityscapes.
|
||||
[SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://huggingface.co/papers/2105.15203) is a semantic segmentation model that combines a hierarchical Transformer encoder (Mix Transformer, MiT) with a lightweight all-MLP decoder. It avoids positional encodings and complex decoders and achieves state-of-the-art performance on benchmarks like ADE20K and Cityscapes. This simple and lightweight design is more efficient and scalable.
|
||||
|
||||
The abstract from the paper is the following:
|
||||
|
||||
*We present SegFormer, a simple, efficient yet powerful semantic segmentation framework which unifies Transformers with
|
||||
lightweight multilayer perception (MLP) decoders. SegFormer has two appealing features: 1) SegFormer comprises a novel
|
||||
hierarchically structured Transformer encoder which outputs multiscale features. It does not need positional encoding,
|
||||
thereby avoiding the interpolation of positional codes which leads to decreased performance when the testing resolution
|
||||
differs from training. 2) SegFormer avoids complex decoders. The proposed MLP decoder aggregates information from
|
||||
different layers, and thus combining both local attention and global attention to render powerful representations. We
|
||||
show that this simple and lightweight design is the key to efficient segmentation on Transformers. We scale our
|
||||
approach up to obtain a series of models from SegFormer-B0 to SegFormer-B5, reaching significantly better performance
|
||||
and efficiency than previous counterparts. For example, SegFormer-B4 achieves 50.3% mIoU on ADE20K with 64M parameters,
|
||||
being 5x smaller and 2.2% better than the previous best method. Our best model, SegFormer-B5, achieves 84.0% mIoU on
|
||||
Cityscapes validation set and shows excellent zero-shot robustness on Cityscapes-C.*
|
||||
|
||||
The figure below illustrates the architecture of SegFormer. Taken from the [original paper](https://huggingface.co/papers/2105.15203).
|
||||
The figure below illustrates the architecture of SegFormer.
|
||||
|
||||
<img width="600" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/segformer_architecture.png"/>
|
||||
|
||||
This model was contributed by [nielsr](https://huggingface.co/nielsr). The original code can be found [here](https://github.com/NVlabs/SegFormer).
|
||||
You can find all the original SegFormer checkpoints under the [NVIDIA](https://huggingface.co/nvidia/models?search=segformer) organization.
|
||||
|
||||
## Usage tips
|
||||
> [!TIP]
|
||||
> This model was contributed by [nielsr](https://huggingface.co/nielsr).
|
||||
>
|
||||
> Click on the SegFormer models in the right sidebar for more examples of how to apply SegFormer to different vision tasks.
|
||||
|
||||
- SegFormer consists of a hierarchical Transformer encoder, and a lightweight all-MLP decoder head.
|
||||
[`SegformerModel`] is the hierarchical Transformer encoder (which in the paper is also referred to
|
||||
as Mix Transformer or MiT). [`SegformerForSemanticSegmentation`] adds the all-MLP decoder head on
|
||||
top to perform semantic segmentation of images. In addition, there's
|
||||
[`SegformerForImageClassification`] which can be used to - you guessed it - classify images. The
|
||||
authors of SegFormer first pre-trained the Transformer encoder on ImageNet-1k to classify images. Next, they throw
|
||||
away the classification head, and replace it by the all-MLP decode head. Next, they fine-tune the model altogether on
|
||||
ADE20K, Cityscapes and COCO-stuff, which are important benchmarks for semantic segmentation. All checkpoints can be
|
||||
found on the [hub](https://huggingface.co/models?other=segformer).
|
||||
- The quickest way to get started with SegFormer is by checking the [example notebooks](https://github.com/NielsRogge/Transformers-Tutorials/tree/master/SegFormer) (which showcase both inference and
|
||||
fine-tuning on custom data). One can also check out the [blog post](https://huggingface.co/blog/fine-tune-segformer) introducing SegFormer and illustrating how it can be fine-tuned on custom data.
|
||||
- One can also check out [this interactive demo on Hugging Face Spaces](https://huggingface.co/spaces/chansung/segformer-tf-transformers)
|
||||
to try out a SegFormer model on custom images.
|
||||
- SegFormer works on any input size, as it pads the input to be divisible by `config.patch_sizes`.
|
||||
- One can use [`SegformerImageProcessor`] to prepare images and corresponding segmentation maps
|
||||
for the model. Note that this image processor is fairly basic and does not include all data augmentations used in
|
||||
the original paper. The original preprocessing pipelines (for the ADE20k dataset for instance) can be found [here](https://github.com/NVlabs/SegFormer/blob/master/local_configs/_base_/datasets/ade20k_repeat.py). The most
|
||||
important preprocessing step is that images and segmentation maps are randomly cropped and padded to the same size,
|
||||
such as 512x512 or 640x640, after which they are normalized.
|
||||
- One additional thing to keep in mind is that one can initialize [`SegformerImageProcessor`] with
|
||||
`do_reduce_labels` set to `True` or `False`. In some datasets (like ADE20k), the 0 index is used in the annotated
|
||||
segmentation maps for background. However, ADE20k doesn't include the "background" class in its 150 labels.
|
||||
Therefore, `do_reduce_labels` is used to reduce all labels by 1, and to make sure no loss is computed for the
|
||||
background class (i.e. it replaces 0 in the annotated maps by 255, which is the *ignore_index* of the loss function
|
||||
used by [`SegformerForSemanticSegmentation`]). However, other datasets use the 0 index as
|
||||
background class and include this class as part of all labels. In that case, `do_reduce_labels` should be set to
|
||||
`False`, as loss should also be computed for the background class.
|
||||
- As most models, SegFormer comes in different sizes, the details of which can be found in the table below
|
||||
(taken from Table 7 of the [original paper](https://huggingface.co/papers/2105.15203)).
|
||||
The example below demonstrates semantic segmentation with [`Pipeline`] or the [`AutoModel`] class.
|
||||
|
||||
| **Model variant** | **Depths** | **Hidden sizes** | **Decoder hidden size** | **Params (M)** | **ImageNet-1k Top 1** |
|
||||
| :---------------: | ------------- | ------------------- | :---------------------: | :------------: | :-------------------: |
|
||||
| MiT-b0 | [2, 2, 2, 2] | [32, 64, 160, 256] | 256 | 3.7 | 70.5 |
|
||||
| MiT-b1 | [2, 2, 2, 2] | [64, 128, 320, 512] | 256 | 14.0 | 78.7 |
|
||||
| MiT-b2 | [3, 4, 6, 3] | [64, 128, 320, 512] | 768 | 25.4 | 81.6 |
|
||||
| MiT-b3 | [3, 4, 18, 3] | [64, 128, 320, 512] | 768 | 45.2 | 83.1 |
|
||||
| MiT-b4 | [3, 8, 27, 3] | [64, 128, 320, 512] | 768 | 62.6 | 83.6 |
|
||||
| MiT-b5 | [3, 6, 40, 3] | [64, 128, 320, 512] | 768 | 82.0 | 83.8 |
|
||||
<hfoptions id="usage">
|
||||
<hfoption id="Pipeline">
|
||||
|
||||
Note that MiT in the above table refers to the Mix Transformer encoder backbone introduced in SegFormer. For
|
||||
SegFormer's results on the segmentation datasets like ADE20k, refer to the [paper](https://huggingface.co/papers/2105.15203).
|
||||
```python
|
||||
import torch
|
||||
from transformers import pipeline
|
||||
|
||||
pipeline = pipeline(task="image-segmentation", model="nvidia/segformer-b0-finetuned-ade-512-512", torch_dtype=torch.float16)
|
||||
pipeline("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg")
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="AutoModel">
|
||||
|
||||
```python
|
||||
import requests
|
||||
from PIL import Image
|
||||
from transformers import AutoProcessor, AutoModelForSemanticSegmentation
|
||||
|
||||
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
|
||||
image = Image.open(requests.get(url, stream=True).raw)
|
||||
|
||||
processor = AutoProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
|
||||
model = AutoModelForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
|
||||
|
||||
inputs = processor(images=image, return_tensors="pt")
|
||||
outputs = model(**inputs)
|
||||
logits = outputs.logits # shape [batch, num_labels, height, width]
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
|
||||
</hfoptions>
|
||||
|
||||
|
||||
|
||||
## Notes
|
||||
|
||||
- SegFormer works with **any input size**, padding inputs to be divisible by `config.patch_sizes`.
|
||||
- The most important preprocessing step is to randomly crop and pad all images to the same size (such as 512x512 or 640x640) and normalize afterwards.
|
||||
- Some datasets (ADE20k) uses the `0` index in the annotated segmentation as the background, but doesn't include the "background" class in its labels. The `do_reduce_labels` argument in [`SegformerForImageProcessor`] is used to reduce all labels by `1`. To make sure no loss is computed for the background class, it replaces `0` in the annotated maps by `255`, which is the `ignore_index` of the loss function.
|
||||
|
||||
Other datasets may include a background class and label though, in which case, `do_reduce_labels` should be `False`.
|
||||
|
||||
```python
|
||||
from transformers import SegformerImageProcessor
|
||||
processor = SegformerImageProcessor(do_reduce_labels=True)
|
||||
```
|
||||
|
||||
## Resources
|
||||
|
||||
A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SegFormer.
|
||||
|
||||
<PipelineTag pipeline="image-classification"/>
|
||||
|
||||
- [`SegformerForImageClassification`] is supported by this [example script](https://github.com/huggingface/transformers/tree/main/examples/pytorch/image-classification) and [notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification.ipynb).
|
||||
- [Image classification task guide](../tasks/image_classification)
|
||||
|
||||
Semantic segmentation:
|
||||
|
||||
- [`SegformerForSemanticSegmentation`] is supported by this [example script](https://github.com/huggingface/transformers/tree/main/examples/pytorch/semantic-segmentation).
|
||||
- A blog on fine-tuning SegFormer on a custom dataset can be found [here](https://huggingface.co/blog/fine-tune-segformer).
|
||||
- More demo notebooks on SegFormer (both inference + fine-tuning on a custom dataset) can be found [here](https://github.com/NielsRogge/Transformers-Tutorials/tree/master/SegFormer).
|
||||
- [Semantic segmentation task guide](../tasks/semantic_segmentation)
|
||||
|
||||
If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
|
||||
- [Original SegFormer code (NVlabs)](https://github.com/NVlabs/SegFormer)
|
||||
- [Fine-tuning blog post](https://huggingface.co/blog/fine-tune-segformer)
|
||||
- [Tutorial notebooks (Niels Rogge)](https://github.com/NielsRogge/Transformers-Tutorials/tree/master/SegFormer)
|
||||
- [Hugging Face demo space](https://huggingface.co/spaces/chansung/segformer-tf-transformers)
|
||||
|
||||
## SegformerConfig
|
||||
|
||||
|
@ -68,7 +68,7 @@ processor = AutoImageProcessor.from_pretrained("magic-leap-community/superglue_o
|
||||
model = AutoModel.from_pretrained("magic-leap-community/superglue_outdoor")
|
||||
|
||||
inputs = processor(images, return_tensors="pt")
|
||||
with torch.no_grad():
|
||||
with torch.inference_mode():
|
||||
outputs = model(**inputs)
|
||||
|
||||
# Post-process to get keypoints and matches
|
||||
@ -95,7 +95,8 @@ processed_outputs = processor.post_process_keypoint_matching(outputs, image_size
|
||||
# SuperGlue requires pairs of images
|
||||
images = [image1, image2]
|
||||
inputs = processor(images, return_tensors="pt")
|
||||
outputs = model(**inputs)
|
||||
with torch.inference_mode():
|
||||
outputs = model(**inputs)
|
||||
|
||||
# Extract matching information
|
||||
keypoints0 = outputs.keypoints0 # Keypoints in first image
|
||||
|
@ -21,10 +21,10 @@ Model users still import and use the single-file interface they've grown familia
|
||||
|
||||
A linter "unravels" the modular file into a `modeling.py` file to preserve the single model, single file directory structure (modeling, processor, etc.). Inheritance is flattened to only a **single** level.
|
||||
|
||||
Run the command below to automatically generate a `modeling.py` file from a modular file.
|
||||
Run the command below to automatically generate a `modeling.py` file from a modular file (assuming the snake lowercase name of the model you want to convert is `your_model`).
|
||||
|
||||
```bash
|
||||
python utils/modular_model_converter.py --files-to-parse src/transformers/models/<your_model>/modular_<your_model>.py
|
||||
python utils/modular_model_converter.py your_model
|
||||
```
|
||||
|
||||
For example:
|
||||
@ -35,12 +35,6 @@ For example:
|
||||
|
||||
You should be able to write everything (tokenizer, image processor, model, config, etc.) in a modular and their corresponding single-files are generated.
|
||||
|
||||
Run the command below to ensure the generated content matches `modular_<your_model>.py`.
|
||||
|
||||
```bash
|
||||
python utils/check_modular_conversion.py --files src/transformers/models/<your_model>/modular_<your_model>.py
|
||||
```
|
||||
|
||||
The example below demonstrates how a model can be added with significantly fewer lines of code with Modular Transformers.
|
||||
|
||||
### BERT and RoBERTa
|
||||
@ -412,17 +406,17 @@ class MyNewDummyModel(DummyModel):
|
||||
del self.attribute
|
||||
```
|
||||
|
||||
## Explicit super() calls
|
||||
## Calling parent methods without unravelling their definition
|
||||
|
||||
If you still want to inherit from `DummyModel` but don't want to remove the `self.attribute`, be explicit about which class' `super()` you're calling. The example below shows how to call the `super()` of `nn.Module` (unraveled code shown on the right)
|
||||
If you want to inherit from a module `DummyModule` and want to call `super()` WITHOUT unravelling the parent's code (that is, you want to call `super()` on the *generated* class parent), be explicit about which class' `super()` you're calling. The example below shows how to call the `super()` of `nn.Module` (unraveled code shown on the right). In this example, as `DummyModule` is itself a `nn.Module`, it makes sense to call `nn.Module.__init__(self)` as it's what was the initial intention. It's then unravelled as `super()` in `MyNewDummyModule` to follow Python's best-practices.
|
||||
|
||||
```py
|
||||
class MyNewDummyModel(DummyModel, nn.Module): | class MyNewDummyModel(nn.Module):
|
||||
|
|
||||
def __init__(self, config: MyNewDummyConfig): | def __init__(self, config: MyNewDummyConfig):
|
||||
nn.Module.__init__(config) | super().__init__()
|
||||
self.foo = config.foo | self.foo = config.foo
|
||||
... | ...
|
||||
class MyNewDummyModule(DummyModule): | class MyNewDummyModule(nn.Module):
|
||||
|
|
||||
def __init__(self): | def __init__(self):
|
||||
nn.Module.__init__(self) | super().__init__()
|
||||
self.foo = config.foo | self.foo = config.foo
|
||||
... | ...
|
||||
```
|
||||
|
||||
## Deleting unused methods
|
||||
|
@ -24,23 +24,23 @@ Use the Space below to help you pick a quantization method depending on your har
|
||||
|
||||
| Quantization Method | On the fly quantization | CPU | CUDA GPU | ROCm GPU | Metal (Apple Silicon) | Intel GPU | Torch compile() | Bits | PEFT Fine Tuning | Serializable with 🤗Transformers | 🤗Transformers Support | Link to library |
|
||||
|-------------------------------------------|----------------------|-----------------|----------|-----------|------------------------------------|-----------------|-----------------|--------------|------------------|-----------------------------|-------------------------|---------------------------------------------|
|
||||
| [AQLM](./aqlm) | 🔴 | 🟢 | 🟢 | 🔴 | 🔴 | 🔴 | 🟢 | 1/2 | 🟢 | 🟢 | 🟢 | https://github.com/Vahe1994/AQLM |
|
||||
| [AQLM](./aqlm) | 🔴 | 🟢 | 🟢 | 🔴 | 🔴 | 🟢 | 🟢 | 1/2 | 🟢 | 🟢 | 🟢 | https://github.com/Vahe1994/AQLM |
|
||||
| [AutoRound](./auto_round) | 🔴 | 🟢 | 🟢 | 🔴 | 🔴 | 🟢 | 🔴 | 2/3/4/8 | 🔴 | 🟢 | 🟢 | https://github.com/intel/auto-round |
|
||||
| [AWQ](./awq) | 🔴 | 🟢 | 🟢 | 🟢 | 🔴 | 🟢 | ? | 4 | 🟢 | 🟢 | 🟢 | https://github.com/casper-hansen/AutoAWQ |
|
||||
| [bitsandbytes](./bitsandbytes) | 🟢 | 🟡 | 🟢 | 🟡 | 🔴 | 🟡 | 🟢 | 4/8 | 🟢 | 🟢 | 🟢 | https://github.com/bitsandbytes-foundation/bitsandbytes |
|
||||
| [compressed-tensors](./compressed_tensors) | 🔴 | 🟢 | 🟢 | 🟢 | 🔴 | 🔴 | 🔴 | 1/8 | 🟢 | 🟢 | 🟢 | https://github.com/neuralmagic/compressed-tensors |
|
||||
| [EETQ](./eetq) | 🟢 | 🔴 | 🟢 | 🔴 | 🔴 | 🔴 | ? | 8 | 🟢 | 🟢 | 🟢 | https://github.com/NetEase-FuXi/EETQ |
|
||||
| [FP-Quant](./fp_quant) | 🟢 | 🔴 | 🟢 | 🔴 | 🔴 | 🔴 | 🟢 | 4 | 🔴 | 🟢 | 🟢 | https://github.com/IST-DASLab/FP-Quant |
|
||||
| [GGUF / GGML (llama.cpp)](../gguf) | 🟢 | 🟢 | 🟢 | 🔴 | 🟢 | 🔴 | 🔴 | 1/8 | 🔴 | [See Notes](../gguf) | [See Notes](../gguf) | https://github.com/ggerganov/llama.cpp |
|
||||
| [GGUF / GGML (llama.cpp)](../gguf) | 🟢 | 🟢 | 🟢 | 🔴 | 🟢 | 🟢 | 🔴 | 1/8 | 🔴 | [See Notes](../gguf) | [See Notes](../gguf) | https://github.com/ggerganov/llama.cpp |
|
||||
| [GPTQModel](./gptq) | 🔴 | 🟢 | 🟢 | 🟢 | 🟢 | 🟢 | 🔴 | 2/3/4/8 | 🟢 | 🟢 | 🟢 | https://github.com/ModelCloud/GPTQModel |
|
||||
| [AutoGPTQ](./gptq) | 🔴 | 🔴 | 🟢 | 🟢 | 🔴 | 🔴 | 🔴 | 2/3/4/8 | 🟢 | 🟢 | 🟢 | https://github.com/AutoGPTQ/AutoGPTQ |
|
||||
| [HIGGS](./higgs) | 🟢 | 🔴 | 🟢 | 🔴 | 🔴 | 🔴 | 🟢 | 2/4 | 🔴 | 🟢 | 🟢 | https://github.com/HanGuo97/flute |
|
||||
| [HQQ](./hqq) | 🟢 | 🟢 | 🟢 | 🔴 | 🔴 | 🔴 | 🟢 | 1/8 | 🟢 | 🔴 | 🟢 | https://github.com/mobiusml/hqq/ |
|
||||
| [optimum-quanto](./quanto) | 🟢 | 🟢 | 🟢 | 🔴 | 🟢 | 🔴 | 🟢 | 2/4/8 | 🔴 | 🔴 | 🟢 | https://github.com/huggingface/optimum-quanto |
|
||||
| [HQQ](./hqq) | 🟢 | 🟢 | 🟢 | 🔴 | 🔴 | 🟢 | 🟢 | 1/8 | 🟢 | 🔴 | 🟢 | https://github.com/mobiusml/hqq/ |
|
||||
| [optimum-quanto](./quanto) | 🟢 | 🟢 | 🟢 | 🔴 | 🟢 | 🟢 | 🟢 | 2/4/8 | 🔴 | 🔴 | 🟢 | https://github.com/huggingface/optimum-quanto |
|
||||
| [FBGEMM_FP8](./fbgemm_fp8) | 🟢 | 🔴 | 🟢 | 🔴 | 🔴 | 🔴 | 🔴 | 8 | 🔴 | 🟢 | 🟢 | https://github.com/pytorch/FBGEMM |
|
||||
| [torchao](./torchao) | 🟢 | 🟢 | 🟢 | 🔴 | 🟡 | 🔴 | | 4/8 | | 🟢🔴 | 🟢 | https://github.com/pytorch/ao |
|
||||
| [torchao](./torchao) | 🟢 | 🟢 | 🟢 | 🔴 | 🟡 | 🟢 | | 4/8 | | 🟢🔴 | 🟢 | https://github.com/pytorch/ao |
|
||||
| [VPTQ](./vptq) | 🔴 | 🔴 | 🟢 | 🟡 | 🔴 | 🔴 | 🟢 | 1/8 | 🔴 | 🟢 | 🟢 | https://github.com/microsoft/VPTQ |
|
||||
| [FINEGRAINED_FP8](./finegrained_fp8) | 🟢 | 🔴 | 🟢 | 🔴 | 🔴 | 🔴 | 🔴 | 8 | 🔴 | 🟢 | 🟢 | |
|
||||
| [FINEGRAINED_FP8](./finegrained_fp8) | 🟢 | 🔴 | 🟢 | 🔴 | 🔴 | 🟢 | 🔴 | 8 | 🔴 | 🟢 | 🟢 | |
|
||||
| [SpQR](./spqr) | 🔴 | 🔴 | 🟢 | 🔴 | 🔴 | 🔴 | 🟢 | 3 | 🔴 | 🟢 | 🟢 | https://github.com/Vahe1994/SpQR/ |
|
||||
| [Quark](./quark) | 🔴 | 🟢 | 🟢 | 🟢 | 🟢 | 🟢 | ? | 2/4/6/8/9/16 | 🔴 | 🔴 | 🟢 | https://quark.docs.amd.com/latest/ |
|
||||
|
||||
|
@ -107,7 +107,7 @@ model_id = "meta-llama/Llama-2-7b-chat-hf"
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.bfloat16, device_map=device)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
past_key_values = DynamicCache()
|
||||
past_key_values = DynamicCache(config=model.config)
|
||||
messages = [{"role": "user", "content": "Hello, what's your name."}]
|
||||
inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt", return_dict=True).to(model.device)
|
||||
|
||||
|
@ -29,7 +29,7 @@ BioGPT는 Renqian Luo, Liai Sun, Yingce Xia, Tao Qin, Sheng Zhang, Hoifung Poon,
|
||||
## 사용 팁 [[usage-tips]]
|
||||
|
||||
- BioGPT는 절대적 위치 임베딩(absolute position embedding)을 사용하므로, 입력을 왼쪽이 아닌 오른쪽에서 패딩하는 것이 권장됩니다.
|
||||
- BioGPT는 인과적 언어 모델링(Casual Langague Modeling, CLM) 목표로 학습되었기 때문에, 다음 토큰을 예측하는 데 강력한 성능을 보입니다. 이 기능을 활용하여 BioGPT는 구문적으로 일관된 텍스트를 생성할 수 있으며, 예시 스크립트 `run_generation.py`에서 이를 확인할 수 있습니다.
|
||||
- BioGPT는 인과적 언어 모델링(Causal Langague Modeling, CLM) 목표로 학습되었기 때문에, 다음 토큰을 예측하는 데 강력한 성능을 보입니다. 이 기능을 활용하여 BioGPT는 구문적으로 일관된 텍스트를 생성할 수 있으며, 예시 스크립트 `run_generation.py`에서 이를 확인할 수 있습니다.
|
||||
- 이 모델은 `past_key_values`(PyTorch 용)를 입력으로 받을 수 있는데, 이는 이전에 계산된 키/값 어텐션 쌍입니다. 이 값을 사용하면 텍스트 생성 중 이미 계산된 값을 다시 계산하지 않도록 할 수 있습니다. PyTorch에서 `past_key_values` 인수는 BioGptForCausalLM.forward() 메소드에서 자세히 설명되어 있습니다.
|
||||
|
||||
### Scaled Dot Product Attention(SDPA) 사용 [[using-scaled-dot-product-attention-sdpa]]
|
||||
|
@ -60,7 +60,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
Array = Any
|
||||
Dataset = datasets.arrow_dataset.Dataset
|
||||
|
@ -59,7 +59,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risk.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/flax/speech-recognition/requirements.txt")
|
||||
|
||||
|
@ -55,7 +55,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
Array = Any
|
||||
Dataset = datasets.arrow_dataset.Dataset
|
||||
|
@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")
|
||||
|
||||
|
@ -541,7 +541,7 @@ class DummyBertEncoder(nn.Module):
|
||||
use_cache = False
|
||||
|
||||
if use_cache and self.config.is_decoder and past_key_values is None:
|
||||
past_key_values = EncoderDecoderCache(DynamicCache(), DynamicCache())
|
||||
past_key_values = EncoderDecoderCache(DynamicCache(config=self.config), DynamicCache(config=self.config))
|
||||
|
||||
if use_cache and self.config.is_decoder and isinstance(past_key_values, tuple):
|
||||
logger.warning_once(
|
||||
|
@ -544,7 +544,7 @@ class RobertaEncoder(nn.Module):
|
||||
use_cache = False
|
||||
|
||||
if use_cache and self.config.is_decoder and past_key_values is None:
|
||||
past_key_values = EncoderDecoderCache(DynamicCache(), DynamicCache())
|
||||
past_key_values = EncoderDecoderCache(DynamicCache(config=self.config), DynamicCache(config=self.config))
|
||||
|
||||
if use_cache and self.config.is_decoder and isinstance(past_key_values, tuple):
|
||||
logger.warning_once(
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "datasets[audio]>=1.14.0",
|
||||
# "evaluate",
|
||||
# "librosa",
|
||||
@ -55,7 +55,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.14.0", "To fix: pip install -r examples/pytorch/audio-classification/requirements.txt")
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "torch>=1.5.0",
|
||||
# "torchvision>=0.6.0",
|
||||
# "datasets>=1.8.0",
|
||||
@ -63,7 +63,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "accelerate>=0.12.0",
|
||||
# "torch>=1.5.0",
|
||||
# "torchvision>=0.6.0",
|
||||
@ -68,7 +68,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "accelerate>=0.12.0",
|
||||
# "torch>=1.5.0",
|
||||
# "torchvision>=0.6.0",
|
||||
@ -61,7 +61,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "torch>=1.5.0",
|
||||
# "torchvision>=0.6.0",
|
||||
# "datasets>=1.8.0",
|
||||
@ -51,7 +51,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "torch>=1.5.0",
|
||||
# "torchvision>=0.6.0",
|
||||
# "datasets>=1.8.0",
|
||||
@ -56,7 +56,7 @@ Any model supported by the AutoModelForMaskedImageModeling API can be used.
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "torch>=1.5.0",
|
||||
# "torchvision>=0.6.0",
|
||||
# "datasets>=1.8.0",
|
||||
@ -61,7 +61,7 @@ Any model supported by the AutoModelForMaskedImageModeling API can be used.
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "timm",
|
||||
# "datasets",
|
||||
@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/instance-segmentation/requirements.txt")
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "timm",
|
||||
# "datasets",
|
||||
@ -63,7 +63,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/instance-segmentation/requirements.txt")
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "torch >= 1.3",
|
||||
@ -69,7 +69,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "torch >= 1.3",
|
||||
@ -71,7 +71,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "torch >= 1.3",
|
||||
@ -72,7 +72,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "torch >= 1.3",
|
||||
@ -74,7 +74,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "torch >= 1.3",
|
||||
@ -68,7 +68,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "torch >= 1.3",
|
||||
@ -71,7 +71,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "torch >= 1.3",
|
||||
@ -61,7 +61,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
# "protobuf",
|
||||
@ -57,7 +57,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
# "protobuf",
|
||||
@ -65,7 +65,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
# You should update this to your particular problem to have better documentation of `model_type`
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "timm",
|
||||
# "datasets>=4.0",
|
||||
@ -59,7 +59,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/object-detection/requirements.txt")
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "albumentations >= 1.4.16",
|
||||
# "timm",
|
||||
# "datasets>=4.0",
|
||||
@ -63,7 +63,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = get_logger(__name__)
|
||||
|
@ -49,7 +49,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -47,7 +47,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -54,7 +54,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -45,7 +45,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "datasets >= 2.0.0",
|
||||
# "torch >= 1.3",
|
||||
# "accelerate",
|
||||
@ -62,7 +62,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/semantic-segmentation/requirements.txt")
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "datasets >= 2.0.0",
|
||||
# "torch >= 1.3",
|
||||
# "accelerate",
|
||||
@ -62,7 +62,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "datasets[audio] >= 1.12.0",
|
||||
# "torch >= 1.5",
|
||||
# "torchaudio",
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "datasets[audio] >= 1.18.0",
|
||||
# "torch >= 1.5",
|
||||
# "torchaudio",
|
||||
@ -61,7 +61,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "datasets[audio] >= 1.18.0",
|
||||
# "torch >= 1.5",
|
||||
# "torchaudio",
|
||||
@ -64,7 +64,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "datasets[audio] >= 1.18.0",
|
||||
# "torch >= 1.5",
|
||||
# "torchaudio",
|
||||
@ -60,7 +60,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "datasets >= 1.8.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
@ -67,7 +67,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "datasets >= 1.8.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
@ -71,7 +71,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "datasets >= 1.8.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
@ -61,7 +61,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "datasets >= 1.8.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
@ -63,7 +63,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "datasets >= 1.8.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
@ -63,7 +63,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "datasets >= 1.8.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
@ -62,7 +62,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
|
||||
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "accelerate >= 0.21.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
# "protobuf",
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "seqeval",
|
||||
# "datasets >= 1.8.0",
|
||||
@ -60,7 +60,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "seqeval",
|
||||
# "datasets >= 1.8.0",
|
||||
@ -67,7 +67,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "datasets >= 1.8.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
@ -66,7 +66,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "transformers @ git+https://github.com/huggingface/transformers.git",
|
||||
# "transformers==4.56.2",
|
||||
# "accelerate >= 0.12.0",
|
||||
# "datasets >= 1.8.0",
|
||||
# "sentencepiece != 0.1.92",
|
||||
@ -71,7 +71,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")
|
||||
|
@ -50,7 +50,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version(
|
||||
"datasets>=1.8.0", "To fix: pip install -r examples/tensorflow/contrastive-image-text/requirements.txt"
|
||||
|
@ -54,7 +54,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")
|
||||
|
||||
|
@ -49,7 +49,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -61,7 +61,7 @@ except (ModuleNotFoundError, ImportError):
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -52,7 +52,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
# region Checking dependencies
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
||||
|
@ -46,7 +46,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
task_to_keys = {
|
||||
"cola": ("sentence", None),
|
||||
|
@ -55,7 +55,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
# region Dependencies and constants
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.56.0.dev0")
|
||||
check_min_version("4.56.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
||||
|
4
setup.py
4
setup.py
@ -189,7 +189,7 @@ _deps = [
|
||||
"timeout-decorator",
|
||||
"tiktoken",
|
||||
"timm<=1.0.19,!=1.0.18",
|
||||
"tokenizers>=0.21,<0.22",
|
||||
"tokenizers>=0.22.0,<=0.23.0",
|
||||
"torch>=2.2",
|
||||
"torchaudio",
|
||||
"torchvision",
|
||||
@ -463,7 +463,7 @@ install_requires = [
|
||||
|
||||
setup(
|
||||
name="transformers",
|
||||
version="4.56.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="4.56.2", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
|
||||
author_email="transformers@huggingface.co",
|
||||
description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",
|
||||
|
@ -18,7 +18,7 @@
|
||||
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
|
||||
# in the namespace without actually importing anything (and especially none of the backends).
|
||||
|
||||
__version__ = "4.56.0.dev0"
|
||||
__version__ = "4.56.2"
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
@ -19,6 +19,7 @@ import torch
|
||||
from torch import Tensor, nn
|
||||
|
||||
from .utils import logging
|
||||
from .utils.import_utils import is_torchdynamo_compiling
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
@ -185,6 +186,100 @@ class ClassInstantier(OrderedDict):
|
||||
return cls(**kwargs)
|
||||
|
||||
|
||||
class XIELUActivation(nn.Module):
|
||||
"""
|
||||
Applies the xIELU activation function introduced in https://arxiv.org/abs/2411.13010
|
||||
|
||||
If the user has installed the nickjbrowning/XIELU wheel, we import xIELU CUDA
|
||||
Otherwise, we emit a single warning and use xIELU Python
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
alpha_p_init=0.8,
|
||||
alpha_n_init=0.8,
|
||||
beta=0.5,
|
||||
eps=-1e-6,
|
||||
dtype=torch.bfloat16,
|
||||
with_vector_loads=False,
|
||||
):
|
||||
super().__init__()
|
||||
self.alpha_p = nn.Parameter(torch.log(torch.exp(torch.tensor(alpha_p_init, dtype=dtype)) - 1).unsqueeze(0))
|
||||
self.alpha_n = nn.Parameter(
|
||||
torch.log(torch.exp(torch.tensor(alpha_n_init - beta, dtype=dtype)) - 1).unsqueeze(0)
|
||||
)
|
||||
self.register_buffer("beta", torch.tensor(beta, dtype=dtype))
|
||||
self.register_buffer("eps", torch.tensor(eps, dtype=dtype))
|
||||
self.with_vector_loads = with_vector_loads
|
||||
# Temporary until xIELU CUDA fully implemented
|
||||
self._beta_scalar = float(self.beta.detach().cpu().float().item())
|
||||
self._eps_scalar = float(self.eps.detach().cpu().float().item())
|
||||
|
||||
self._xielu_cuda_obj = None
|
||||
try:
|
||||
import xielu.ops # noqa: F401
|
||||
|
||||
self._xielu_cuda_obj = torch.classes.xielu.XIELU()
|
||||
msg = "Using experimental xIELU CUDA."
|
||||
try:
|
||||
from torch._dynamo import allow_in_graph
|
||||
|
||||
self._xielu_cuda_fn = allow_in_graph(self._xielu_cuda)
|
||||
msg += " Enabled torch._dynamo for xIELU CUDA."
|
||||
except Exception as err:
|
||||
msg += f" Could not enable torch._dynamo for xIELU ({err}) - this may result in slower performance."
|
||||
self._xielu_cuda_fn = self._xielu_cuda
|
||||
logger.warning_once(msg)
|
||||
except Exception as err:
|
||||
logger.warning_once(
|
||||
"CUDA-fused xIELU not available (%s) – falling back to a Python version.\n"
|
||||
"For CUDA xIELU (experimental), `pip install git+https://github.com/nickjbrowning/XIELU`",
|
||||
str(err),
|
||||
)
|
||||
|
||||
def _xielu_python(self, x: Tensor) -> Tensor:
|
||||
alpha_p = nn.functional.softplus(self.alpha_p)
|
||||
alpha_n = self.beta + nn.functional.softplus(self.alpha_n)
|
||||
return torch.where(
|
||||
x > 0,
|
||||
alpha_p * x * x + self.beta * x,
|
||||
(torch.expm1(torch.min(x, self.eps)) - x) * alpha_n + self.beta * x,
|
||||
)
|
||||
|
||||
def _xielu_cuda(self, x: Tensor) -> Tensor:
|
||||
"""Firewall function to prevent torch.compile from seeing .item() calls"""
|
||||
original_shape = x.shape
|
||||
# CUDA kernel expects 3D tensors, reshape if needed
|
||||
while x.dim() < 3:
|
||||
x = x.unsqueeze(0)
|
||||
if x.dim() > 3:
|
||||
x = x.view(-1, 1, x.size(-1))
|
||||
if original_shape != x.shape:
|
||||
logger.warning_once(
|
||||
"Warning: xIELU input tensor expects 3 dimensions but got (shape: %s). Reshaping to (shape: %s).",
|
||||
original_shape,
|
||||
x.shape,
|
||||
)
|
||||
result = self._xielu_cuda_obj.forward(
|
||||
x,
|
||||
self.alpha_p,
|
||||
self.alpha_n,
|
||||
# Temporary until xIELU CUDA fully implemented -> self.{beta,eps}.item()
|
||||
self._beta_scalar,
|
||||
self._eps_scalar,
|
||||
self.with_vector_loads,
|
||||
)
|
||||
return result.view(original_shape)
|
||||
|
||||
def forward(self, input: Tensor) -> Tensor:
|
||||
if self._xielu_cuda_obj is not None and input.is_cuda:
|
||||
if not is_torchdynamo_compiling():
|
||||
return self._xielu_cuda_fn(input)
|
||||
else:
|
||||
logger.warning_once("torch._dynamo is compiling, using Python version of xIELU.")
|
||||
return self._xielu_python(input)
|
||||
|
||||
|
||||
ACT2CLS = {
|
||||
"gelu": GELUActivation,
|
||||
"gelu_10": (ClippedGELUActivation, {"min": -10, "max": 10}),
|
||||
@ -206,6 +301,7 @@ ACT2CLS = {
|
||||
"swish": nn.SiLU,
|
||||
"tanh": nn.Tanh,
|
||||
"prelu": nn.PReLU,
|
||||
"xielu": XIELUActivation,
|
||||
}
|
||||
ACT2FN = ClassInstantier(ACT2CLS)
|
||||
|
||||
|
@ -17,6 +17,7 @@ and remove unnecessary dependencies.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import importlib
|
||||
import io
|
||||
import os
|
||||
import warnings
|
||||
@ -25,6 +26,7 @@ from typing import Any, Optional, Sequence, Union
|
||||
|
||||
import numpy as np
|
||||
import requests
|
||||
from packaging import version
|
||||
|
||||
from .utils import (
|
||||
is_librosa_available,
|
||||
@ -46,8 +48,7 @@ if is_librosa_available():
|
||||
import soxr
|
||||
|
||||
if is_torchcodec_available():
|
||||
from torchcodec.decoders import AudioDecoder
|
||||
|
||||
TORCHCODEC_VERSION = version.parse(importlib.metadata.version("torchcodec"))
|
||||
|
||||
AudioInput = Union[np.ndarray, "torch.Tensor", Sequence[np.ndarray], Sequence["torch.Tensor"]] # noqa: F821
|
||||
|
||||
@ -71,8 +72,8 @@ def load_audio(audio: Union[str, np.ndarray], sampling_rate=16000, timeout=None)
|
||||
if isinstance(audio, str):
|
||||
# Try to load with `torchcodec` but do not enforce users to install it. If not found
|
||||
# fallback to `librosa`. If using an audio-only model, most probably `torchcodec` won't be
|
||||
# needed.
|
||||
if is_torchcodec_available():
|
||||
# needed. Do not raise any errors if not installed or versions do not match
|
||||
if is_torchcodec_available() and TORCHCODEC_VERSION >= version.parse("0.3.0"):
|
||||
audio = load_audio_torchcodec(audio, sampling_rate=sampling_rate)
|
||||
else:
|
||||
audio = load_audio_librosa(audio, sampling_rate=sampling_rate, timeout=timeout)
|
||||
@ -99,7 +100,9 @@ def load_audio_torchcodec(audio: Union[str, np.ndarray], sampling_rate=16000) ->
|
||||
Returns:
|
||||
`np.ndarray`: A numpy array representing the audio.
|
||||
"""
|
||||
requires_backends(load_audio, ["torchcodec"])
|
||||
# Lazy import so that issues in torchcodec compatibility don't crash the whole library
|
||||
requires_backends(load_audio_torchcodec, ["torchcodec"])
|
||||
from torchcodec.decoders import AudioDecoder
|
||||
|
||||
# Set `num_channels` to `1` which is what most models expects and the default in librosa
|
||||
decoder = AudioDecoder(audio, sample_rate=sampling_rate, num_channels=1)
|
||||
@ -123,7 +126,7 @@ def load_audio_librosa(audio: Union[str, np.ndarray], sampling_rate=16000, timeo
|
||||
Returns:
|
||||
`np.ndarray`: A numpy array representing the audio.
|
||||
"""
|
||||
requires_backends(load_audio, ["librosa"])
|
||||
requires_backends(load_audio_librosa, ["librosa"])
|
||||
|
||||
# Load audio from URL (e.g https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/translate_to_chinese.wav)
|
||||
if audio.startswith("http://") or audio.startswith("https://"):
|
||||
|
@ -99,7 +99,7 @@ class DynamicLayer(CacheLayerMixin):
|
||||
cache_kwargs: Optional[dict[str, Any]] = None,
|
||||
) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
Update the key and value caches in-place, and return the necessary kes and value states.
|
||||
Update the key and value caches in-place, and return the necessary keys and value states.
|
||||
|
||||
Args:
|
||||
key_states (`torch.Tensor`): The new key states to cache.
|
||||
@ -182,7 +182,7 @@ class DynamicSlidingWindowLayer(DynamicLayer):
|
||||
cache_kwargs: Optional[dict[str, Any]] = None,
|
||||
) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
Update the key and value caches in-place, and return the necessary kes and value states.
|
||||
Update the key and value caches in-place, and return the necessary keys and value states.
|
||||
|
||||
Args:
|
||||
key_states (`torch.Tensor`): The new key states to cache.
|
||||
@ -303,7 +303,7 @@ class StaticLayer(CacheLayerMixin):
|
||||
cache_kwargs: Optional[dict[str, Any]] = None,
|
||||
) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
Update the key and value caches in-place, and return the necessary kes and value states.
|
||||
Update the key and value caches in-place, and return the necessary keys and value states.
|
||||
|
||||
Args:
|
||||
key_states (`torch.Tensor`): The new key states to cache.
|
||||
@ -378,7 +378,7 @@ class SlidingWindowLayer(StaticLayer):
|
||||
cache_kwargs: Optional[dict[str, Any]] = None,
|
||||
) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
Update the key and value caches in-place, and return the necessary kes and value states.
|
||||
Update the key and value caches in-place, and return the necessary keys and value states.
|
||||
|
||||
Args:
|
||||
key_states (`torch.Tensor`): The new key states to cache.
|
||||
@ -457,7 +457,7 @@ class ChunkedSlidingLayer(SlidingWindowLayer):
|
||||
cache_kwargs: Optional[dict[str, Any]] = None,
|
||||
) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
Update the key and value caches in-place, and return the necessary kes and value states.
|
||||
Update the key and value caches in-place, and return the necessary keys and value states.
|
||||
|
||||
Args:
|
||||
key_states (`torch.Tensor`): The new key states to cache.
|
||||
@ -566,7 +566,7 @@ class QuantizedLayer(DynamicLayer):
|
||||
cache_kwargs: Optional[dict[str, Any]] = None,
|
||||
) -> tuple[torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
Update the key and value caches in-place, and return the necessary kes and value states.
|
||||
Update the key and value caches in-place, and return the necessary keys and value states.
|
||||
|
||||
Args:
|
||||
key_states (`torch.Tensor`): The new key states to cache.
|
||||
@ -996,7 +996,6 @@ class DynamicCache(Cache):
|
||||
>>> past_key_values = DynamicCache(config=model.config)
|
||||
>>> outputs = model(**inputs, past_key_values=past_key_values, use_cache=True)
|
||||
>>> outputs.past_key_values # access cache filled with key/values from generation
|
||||
DynamicCache()
|
||||
```
|
||||
"""
|
||||
|
||||
@ -1018,6 +1017,9 @@ class DynamicCache(Cache):
|
||||
"sliding_attention" if sliding_window is not None else "full_attention"
|
||||
for _ in range(config.num_hidden_layers)
|
||||
]
|
||||
# Some models have shared layers thus no cache is needed for them (e.g. Gemma3n)
|
||||
if hasattr(config, "num_kv_shared_layers"):
|
||||
layer_types = layer_types[: -config.num_kv_shared_layers]
|
||||
|
||||
for layer_type in layer_types:
|
||||
if layer_type in ("sliding_attention", "chunked_attention"):
|
||||
@ -1129,6 +1131,9 @@ class StaticCache(Cache):
|
||||
layer_types = ["chunked_attention" for _ in range(config.num_hidden_layers)]
|
||||
else:
|
||||
layer_types = ["full_attention" for _ in range(config.num_hidden_layers)]
|
||||
# Some models have shared layers thus no cache is needed for them (e.g. Gemma3n)
|
||||
if hasattr(config, "num_kv_shared_layers"):
|
||||
layer_types = layer_types[: -config.num_kv_shared_layers]
|
||||
|
||||
layers = []
|
||||
for layer_type in layer_types:
|
||||
@ -1223,8 +1228,8 @@ class EncoderDecoderCache(Cache):
|
||||
>>> inputs = processor(audio=YOUR-AUDIO, return_tensors="pt")
|
||||
|
||||
>>> # Prepare cache classes for encoder and decoder and pass it to model's forward
|
||||
>>> self_attention_cache = DynamicCache()
|
||||
>>> cross_attention_cache = DynamicCache()
|
||||
>>> self_attention_cache = DynamicCache(config=self.config)
|
||||
>>> cross_attention_cache = DynamicCache(config=self.config)
|
||||
>>> past_key_values = EncoderDecoderCache(self_attention_cache, cross_attention_cache)
|
||||
>>> outputs = model(**inputs, past_key_values=past_key_values, use_cache=True)
|
||||
>>> outputs.past_key_values # access cache filled with key/values from generation
|
||||
|
@ -129,7 +129,6 @@ class RichInterface:
|
||||
text = ""
|
||||
async for token in await stream:
|
||||
outputs = token.choices[0].delta.content
|
||||
request_id = token.id
|
||||
|
||||
if not outputs:
|
||||
continue
|
||||
@ -168,7 +167,7 @@ class RichInterface:
|
||||
|
||||
self._console.print()
|
||||
|
||||
return text, request_id
|
||||
return text
|
||||
|
||||
def input(self) -> str:
|
||||
"""Gets user input from the console."""
|
||||
@ -700,8 +699,6 @@ class ChatCommand(BaseTransformersCLICommand):
|
||||
interface.clear()
|
||||
chat = self.clear_chat_history(args.system_prompt)
|
||||
|
||||
request_id = None
|
||||
|
||||
# Starts the session with a minimal help message at the top, so that a user doesn't get stuck
|
||||
interface.print_help(minimal=True)
|
||||
while True:
|
||||
@ -733,13 +730,12 @@ class ChatCommand(BaseTransformersCLICommand):
|
||||
chat,
|
||||
stream=True,
|
||||
extra_body={
|
||||
"request_id": request_id,
|
||||
"generation_config": generation_config.to_json_string(),
|
||||
"model": model,
|
||||
},
|
||||
)
|
||||
|
||||
model_output, request_id = await interface.stream_output(stream)
|
||||
model_output = await interface.stream_output(stream)
|
||||
|
||||
chat.append({"role": "assistant", "content": model_output})
|
||||
|
||||
|
@ -1058,7 +1058,9 @@ class PretrainedConfig(PushToHubMixin):
|
||||
if d.get("dtype") is not None:
|
||||
if isinstance(d["dtype"], dict):
|
||||
d["dtype"] = {k: str(v).split(".")[-1] for k, v in d["dtype"].items()}
|
||||
elif not isinstance(d["dtype"], str):
|
||||
# models like Emu3 can have "dtype" as token in config's vocabulary map,
|
||||
# so we also exclude int type here to avoid error in this special case.
|
||||
elif not isinstance(d["dtype"], (str, int)):
|
||||
d["dtype"] = str(d["dtype"]).split(".")[1]
|
||||
for value in d.values():
|
||||
if isinstance(value, dict):
|
||||
|
@ -91,7 +91,7 @@ deps = {
|
||||
"timeout-decorator": "timeout-decorator",
|
||||
"tiktoken": "tiktoken",
|
||||
"timm": "timm<=1.0.19,!=1.0.18",
|
||||
"tokenizers": "tokenizers>=0.21,<0.22",
|
||||
"tokenizers": "tokenizers>=0.22.0,<=0.23.0",
|
||||
"torch": "torch>=2.2",
|
||||
"torchaudio": "torchaudio",
|
||||
"torchvision": "torchvision",
|
||||
|
@ -27,9 +27,9 @@ import numpy as np
|
||||
from .dynamic_module_utils import custom_object_save
|
||||
from .utils import (
|
||||
FEATURE_EXTRACTOR_NAME,
|
||||
PROCESSOR_NAME,
|
||||
PushToHubMixin,
|
||||
TensorType,
|
||||
cached_file,
|
||||
copy_func,
|
||||
download_url,
|
||||
is_flax_available,
|
||||
@ -44,6 +44,7 @@ from .utils import (
|
||||
logging,
|
||||
requires_backends,
|
||||
)
|
||||
from .utils.hub import cached_file
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -505,19 +506,28 @@ class FeatureExtractionMixin(PushToHubMixin):
|
||||
feature_extractor_file = FEATURE_EXTRACTOR_NAME
|
||||
try:
|
||||
# Load from local folder or from cache or download from model Hub and cache
|
||||
resolved_feature_extractor_file = cached_file(
|
||||
pretrained_model_name_or_path,
|
||||
feature_extractor_file,
|
||||
cache_dir=cache_dir,
|
||||
force_download=force_download,
|
||||
proxies=proxies,
|
||||
resume_download=resume_download,
|
||||
local_files_only=local_files_only,
|
||||
subfolder=subfolder,
|
||||
token=token,
|
||||
user_agent=user_agent,
|
||||
revision=revision,
|
||||
)
|
||||
resolved_feature_extractor_files = [
|
||||
resolved_file
|
||||
for filename in [feature_extractor_file, PROCESSOR_NAME]
|
||||
if (
|
||||
resolved_file := cached_file(
|
||||
pretrained_model_name_or_path,
|
||||
filename=filename,
|
||||
cache_dir=cache_dir,
|
||||
force_download=force_download,
|
||||
proxies=proxies,
|
||||
resume_download=resume_download,
|
||||
local_files_only=local_files_only,
|
||||
subfolder=subfolder,
|
||||
token=token,
|
||||
user_agent=user_agent,
|
||||
revision=revision,
|
||||
_raise_exceptions_for_missing_entries=False,
|
||||
)
|
||||
)
|
||||
is not None
|
||||
]
|
||||
resolved_feature_extractor_file = resolved_feature_extractor_files[0]
|
||||
except OSError:
|
||||
# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to
|
||||
# the original exception.
|
||||
@ -536,6 +546,7 @@ class FeatureExtractionMixin(PushToHubMixin):
|
||||
with open(resolved_feature_extractor_file, encoding="utf-8") as reader:
|
||||
text = reader.read()
|
||||
feature_extractor_dict = json.loads(text)
|
||||
feature_extractor_dict = feature_extractor_dict.get("feature_extractor", feature_extractor_dict)
|
||||
|
||||
except json.JSONDecodeError:
|
||||
raise OSError(
|
||||
|
@ -191,7 +191,7 @@ class RequestState:
|
||||
f"query_length={len(self.prompt_ids)}",
|
||||
f"remaining_tokens={len(self.remaining_prompt_ids)}",
|
||||
f"kv_length={self.position_offset}",
|
||||
f"full_prompt_lenght={len(self.full_prompt_ids)}",
|
||||
f"full_prompt_length={len(self.full_prompt_ids)}",
|
||||
f"allocated_blocks={self.allocated_blocks}",
|
||||
f"generated_tokens={self.static_outputs}",
|
||||
]
|
||||
|
@ -1998,7 +1998,7 @@ class GenerationMixin(ContinuousMixin):
|
||||
elif "dynamic" in generation_config.cache_implementation:
|
||||
model_kwargs[cache_name] = DynamicCache(**dynamic_cache_kwargs)
|
||||
|
||||
# Use DynamicCache() instance by default. This will avoid back and forth from legacy format that
|
||||
# Use DynamicCache instance by default. This will avoid back and forth from legacy format that
|
||||
# keeps copying the cache thus using much more memory
|
||||
else:
|
||||
model_kwargs[cache_name] = (
|
||||
|
@ -26,14 +26,15 @@ from .feature_extraction_utils import BatchFeature as BaseBatchFeature
|
||||
from .image_utils import is_valid_image, load_image
|
||||
from .utils import (
|
||||
IMAGE_PROCESSOR_NAME,
|
||||
PROCESSOR_NAME,
|
||||
PushToHubMixin,
|
||||
cached_file,
|
||||
copy_func,
|
||||
download_url,
|
||||
is_offline_mode,
|
||||
is_remote_url,
|
||||
logging,
|
||||
)
|
||||
from .utils.hub import cached_file
|
||||
|
||||
|
||||
ImageProcessorType = TypeVar("ImageProcessorType", bound="ImageProcessingMixin")
|
||||
@ -329,19 +330,28 @@ class ImageProcessingMixin(PushToHubMixin):
|
||||
image_processor_file = image_processor_filename
|
||||
try:
|
||||
# Load from local folder or from cache or download from model Hub and cache
|
||||
resolved_image_processor_file = cached_file(
|
||||
pretrained_model_name_or_path,
|
||||
image_processor_file,
|
||||
cache_dir=cache_dir,
|
||||
force_download=force_download,
|
||||
proxies=proxies,
|
||||
resume_download=resume_download,
|
||||
local_files_only=local_files_only,
|
||||
token=token,
|
||||
user_agent=user_agent,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
)
|
||||
resolved_image_processor_files = [
|
||||
resolved_file
|
||||
for filename in [image_processor_file, PROCESSOR_NAME]
|
||||
if (
|
||||
resolved_file := cached_file(
|
||||
pretrained_model_name_or_path,
|
||||
filename=filename,
|
||||
cache_dir=cache_dir,
|
||||
force_download=force_download,
|
||||
proxies=proxies,
|
||||
resume_download=resume_download,
|
||||
local_files_only=local_files_only,
|
||||
token=token,
|
||||
user_agent=user_agent,
|
||||
revision=revision,
|
||||
subfolder=subfolder,
|
||||
_raise_exceptions_for_missing_entries=False,
|
||||
)
|
||||
)
|
||||
is not None
|
||||
]
|
||||
resolved_image_processor_file = resolved_image_processor_files[0]
|
||||
except OSError:
|
||||
# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to
|
||||
# the original exception.
|
||||
@ -360,6 +370,7 @@ class ImageProcessingMixin(PushToHubMixin):
|
||||
with open(resolved_image_processor_file, encoding="utf-8") as reader:
|
||||
text = reader.read()
|
||||
image_processor_dict = json.loads(text)
|
||||
image_processor_dict = image_processor_dict.get("image_processor", image_processor_dict)
|
||||
|
||||
except json.JSONDecodeError:
|
||||
raise OSError(
|
||||
|
@ -854,7 +854,7 @@ class Seq2SeqLMDecoderExportableModuleWithStaticCache(torch.nn.Module):
|
||||
head_dim = getattr(self.config, "head_dim", self.config.hidden_size // self.config.num_attention_heads)
|
||||
num_heads = getattr(self.config, "num_key_value_heads", self.config.num_attention_heads)
|
||||
self.static_cache.early_initialization(batch_size, num_heads, head_dim, torch.float32, model_device)
|
||||
self.cache = EncoderDecoderCache(self.static_cache, DynamicCache())
|
||||
self.cache = EncoderDecoderCache(self.static_cache, DynamicCache(config=self.config))
|
||||
|
||||
register_dynamic_cache_export_support()
|
||||
|
||||
@ -1051,7 +1051,7 @@ def export_with_dynamic_cache(
|
||||
{
|
||||
"input_ids": example_input_ids,
|
||||
"attention_mask": example_attention_mask,
|
||||
"past_key_values": DynamicCache(),
|
||||
"past_key_values": DynamicCache(config=model.config),
|
||||
"use_cache": True,
|
||||
},
|
||||
strict=False,
|
||||
|
@ -11,7 +11,12 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from typing import Union
|
||||
import re
|
||||
from functools import partial
|
||||
from typing import Optional, Union
|
||||
|
||||
from ..modeling_flash_attention_utils import lazy_import_flash_attention
|
||||
from .flash_attention import flash_attention_forward
|
||||
|
||||
|
||||
try:
|
||||
@ -19,12 +24,13 @@ try:
|
||||
Device,
|
||||
LayerRepository,
|
||||
Mode,
|
||||
get_kernel,
|
||||
register_kernel_mapping,
|
||||
replace_kernel_forward_from_hub,
|
||||
use_kernel_forward_from_hub,
|
||||
)
|
||||
|
||||
_hub_kernels_available = True
|
||||
_kernels_available = True
|
||||
|
||||
_KERNEL_MAPPING: dict[str, dict[Union[Device, str], LayerRepository]] = {
|
||||
"MultiScaleDeformableAttention": {
|
||||
@ -82,8 +88,9 @@ try:
|
||||
|
||||
register_kernel_mapping(_KERNEL_MAPPING)
|
||||
|
||||
|
||||
except ImportError:
|
||||
_kernels_available = False
|
||||
|
||||
# Stub to make decorators int transformers work when `kernels`
|
||||
# is not installed.
|
||||
def use_kernel_forward_from_hub(*args, **kwargs):
|
||||
@ -104,16 +111,66 @@ except ImportError:
|
||||
def register_kernel_mapping(*args, **kwargs):
|
||||
raise RuntimeError("register_kernel_mapping requires `kernels` to be installed. Run `pip install kernels`.")
|
||||
|
||||
_hub_kernels_available = False
|
||||
|
||||
def is_kernel(attn_implementation: Optional[str]) -> bool:
|
||||
"""Check whether `attn_implementation` matches a kernel pattern from the hub."""
|
||||
return (
|
||||
attn_implementation is not None
|
||||
and re.search(r"^[^/:]+/[^/:]+(?:@[^/:]+)?(?::[^/:]+)?$", attn_implementation) is not None
|
||||
)
|
||||
|
||||
|
||||
def is_hub_kernels_available():
|
||||
return _hub_kernels_available
|
||||
def load_and_register_kernel(attn_implementation: str) -> None:
|
||||
"""Load and register the kernel associated to `attn_implementation`."""
|
||||
if not is_kernel(attn_implementation):
|
||||
return
|
||||
if not _kernels_available:
|
||||
raise ImportError("`kernels` is not installed. Please install it with `pip install kernels`.")
|
||||
|
||||
# Need to be imported here as otherwise we have a circular import in `modeling_utils`
|
||||
from ..masking_utils import ALL_MASK_ATTENTION_FUNCTIONS
|
||||
from ..modeling_utils import ALL_ATTENTION_FUNCTIONS
|
||||
|
||||
attention_wrapper = None
|
||||
# FIXME: @ArthurZucker this is dirty, did not want to do a lof of extra work
|
||||
actual_attn_name = attn_implementation
|
||||
if "|" in attn_implementation:
|
||||
attention_wrapper, actual_attn_name = attn_implementation.split("|")
|
||||
# `transformers` has wrapper for sdpa, paged, flash, flex etc.
|
||||
attention_wrapper = ALL_ATTENTION_FUNCTIONS.get(attention_wrapper)
|
||||
# Extract repo_id and kernel_name from the string
|
||||
if ":" in actual_attn_name:
|
||||
repo_id, kernel_name = actual_attn_name.split(":")
|
||||
kernel_name = kernel_name.strip()
|
||||
else:
|
||||
repo_id = actual_attn_name
|
||||
kernel_name = None
|
||||
repo_id = repo_id.strip()
|
||||
# extract the rev after the @ if it exists
|
||||
repo_id, _, rev = repo_id.partition("@")
|
||||
repo_id = repo_id.strip()
|
||||
rev = rev.strip() if rev else None
|
||||
|
||||
# Load the kernel from hub
|
||||
try:
|
||||
kernel = get_kernel(repo_id, revision=rev)
|
||||
except Exception as e:
|
||||
raise ValueError(f"An error occured while trying to load from '{repo_id}': {e}.")
|
||||
# correctly wrap the kernel
|
||||
if hasattr(kernel, "flash_attn_varlen_func"):
|
||||
if attention_wrapper is None:
|
||||
attention_wrapper = flash_attention_forward
|
||||
kernel_function = partial(attention_wrapper, implementation=kernel)
|
||||
lazy_import_flash_attention(kernel)
|
||||
elif kernel_name is not None:
|
||||
kernel_function = getattr(kernel, kernel_name)
|
||||
# Register the kernel as a valid attention
|
||||
ALL_ATTENTION_FUNCTIONS.register(attn_implementation, kernel_function)
|
||||
ALL_MASK_ATTENTION_FUNCTIONS.register(attn_implementation, ALL_MASK_ATTENTION_FUNCTIONS["flash_attention_2"])
|
||||
|
||||
|
||||
__all__ = [
|
||||
"LayerRepository",
|
||||
"is_hub_kernels_available",
|
||||
"use_kernel_forward_from_hub",
|
||||
"register_kernel_mapping",
|
||||
"replace_kernel_forward_from_hub",
|
||||
|
@ -126,10 +126,10 @@ def _lazy_define_process_function(flash_function):
|
||||
|
||||
def lazy_import_flash_attention(implementation: Optional[str]):
|
||||
"""
|
||||
Lazy loading flash attention and returning the respective functions + flags back
|
||||
Lazily import flash attention and return the respective functions + flags.
|
||||
|
||||
NOTE: For fullgraph, this needs to be called before compile while no fullgraph can
|
||||
can work without preloading. See `_check_and_adjust_attn_implementation` in `modeling_utils`.
|
||||
NOTE: For fullgraph, this needs to be called before compile, while no fullgraph can
|
||||
work without preloading. See `load_and_register_kernel` in `integrations.hub_kernels`.
|
||||
"""
|
||||
global _flash_fn, _flash_varlen_fn, _pad_fn, _unpad_fn
|
||||
if any(k is None for k in [_flash_fn, _flash_varlen_fn, _pad_fn, _unpad_fn]):
|
||||
@ -313,17 +313,13 @@ def _upad_input(
|
||||
)
|
||||
|
||||
|
||||
def prepare_fa_kwargs_from_position_ids(position_ids, is_packed_sequence: bool = True):
|
||||
def prepare_fa_kwargs_from_position_ids(position_ids):
|
||||
"""
|
||||
This function returns all the necessary kwargs to call `flash_attn_varlen_func`
|
||||
extracted from position_ids. The `position_ids` can be either packed sequence or
|
||||
the usual padded position ids, for example in inference time.
|
||||
This function returns all the necessary kwargs to call `flash_attn_varlen_func` extracted from position_ids.
|
||||
|
||||
Arguments:
|
||||
position_ids (`torch.Tensor`):
|
||||
Boolean or int tensor of shape (batch_size, sequence_length), 1 means valid and 0 means not valid.
|
||||
is_packed_sequence (`bool`, *optional*, defaults to `True`):
|
||||
Whether the input position ids are a packed sequence or not.
|
||||
|
||||
Return:
|
||||
(cu_seqlens_q, cu_seqlens_k) (`tuple[int]`):
|
||||
@ -333,52 +329,35 @@ def prepare_fa_kwargs_from_position_ids(position_ids, is_packed_sequence: bool =
|
||||
Maximum sequence length in batch (`max_seqlen_in_batch_q` for the target sequence i.e. query,
|
||||
`max_seqlen_in_batch_k` for the source sequence i.e. key/value).
|
||||
"""
|
||||
# If the lengths are not equal, most probably we are in decoding stage with cache
|
||||
# In that case the position ids will not always start with `0` and we need a better way to infer
|
||||
# cumulative seq lengths.
|
||||
tensor_kwargs = {"dtype": torch.int32, "device": position_ids.device}
|
||||
if not is_packed_sequence:
|
||||
last_position_ids = position_ids[:, -1]
|
||||
q_len = (
|
||||
torch.ones(position_ids.size(0), **tensor_kwargs)
|
||||
if position_ids.shape[-1] == 1
|
||||
else last_position_ids.add(1)
|
||||
)
|
||||
cu_seq_lens_q = torch.cat([torch.zeros(1, **tensor_kwargs), q_len.cumsum(0).to(torch.int32)], 0)
|
||||
cu_seq_lens_k = torch.cat(
|
||||
[torch.zeros(1, **tensor_kwargs), last_position_ids.add(1).cumsum(0).to(torch.int32)], 0
|
||||
)
|
||||
|
||||
max_length_q = int(q_len.max())
|
||||
max_length_k = int(last_position_ids.max()) + 1
|
||||
else:
|
||||
position_ids = position_ids.view(-1)
|
||||
indices_q = (position_ids == 0).nonzero().view(-1)
|
||||
position_ids = position_ids.view(-1)
|
||||
indices_q = (position_ids == 0).nonzero().view(-1)
|
||||
|
||||
cu_seq_lens_q = torch.cat(
|
||||
(
|
||||
indices_q.to(**tensor_kwargs),
|
||||
torch.tensor(position_ids.size(), **tensor_kwargs),
|
||||
)
|
||||
cu_seq_lens_q = torch.cat(
|
||||
(
|
||||
indices_q.to(**tensor_kwargs),
|
||||
torch.tensor(position_ids.size(), **tensor_kwargs),
|
||||
)
|
||||
cu_seq_lens_k = cu_seq_lens_q
|
||||
)
|
||||
cu_seq_lens_k = cu_seq_lens_q
|
||||
|
||||
# https://github.com/Dao-AILab/flash-attention/blob/2dd8078adc1d9b74e315ee99718c0dea0de8eeb6/flash_attn/flash_attn_interface.py#L1423-L1424
|
||||
# We should use cu_seq_lens instead of position_ids to get the max length since position_ids is not always increasing
|
||||
# for some models (e.g. qwen2-vl).
|
||||
max_length_q = cu_seq_lens_q.diff().max()
|
||||
# NOTE: With torch compile, this will cause a graph break if you don't set
|
||||
# `TORCHDYNAMO_CAPTURE_SCALAR_OUTPUTS=1` in the environment or call
|
||||
# `torch._dynamo.config.capture_scalar_outputs = True` before doing the forward pass.
|
||||
# This is a limitation of flash attention API, as the function `flash_attn_varlen_func`
|
||||
# requires `max_length_q`, `max_length_k` to be passed as `int` and not `torch.Tensor`.
|
||||
max_length_q = max_length_q.item()
|
||||
max_length_k = max_length_q
|
||||
# https://github.com/Dao-AILab/flash-attention/blob/2dd8078adc1d9b74e315ee99718c0dea0de8eeb6/flash_attn/flash_attn_interface.py#L1423-L1424
|
||||
# We should use cu_seq_lens instead of position_ids to get the max length since position_ids is not always increasing
|
||||
# for some models (e.g. qwen2-vl).
|
||||
max_length_q = cu_seq_lens_q.diff().max()
|
||||
# NOTE: With torch compile, this will cause a graph break if you don't set
|
||||
# `TORCHDYNAMO_CAPTURE_SCALAR_OUTPUTS=1` in the environment or call
|
||||
# `torch._dynamo.config.capture_scalar_outputs = True` before doing the forward pass.
|
||||
# This is a limitation of flash attention API, as the function `flash_attn_varlen_func`
|
||||
# requires `max_length_q`, `max_length_k` to be passed as `int` and not `torch.Tensor`.
|
||||
max_length_q = max_length_q.item()
|
||||
max_length_k = max_length_q
|
||||
|
||||
return (cu_seq_lens_q, cu_seq_lens_k), (max_length_q, max_length_k)
|
||||
|
||||
|
||||
def _prepare_from_posids(query, key, value, position_ids, query_length):
|
||||
def _prepare_from_posids(query, key, value, position_ids):
|
||||
"""
|
||||
This function returns necessary arguments to call `flash_attn_varlen_func`.
|
||||
All three query, key, value states will be flattened.
|
||||
@ -394,8 +373,6 @@ def _prepare_from_posids(query, key, value, position_ids, query_length):
|
||||
Value state with padding. Shape: (batch_size, kv_seq_len, num_key_value_heads, head_dim).
|
||||
position_ids (`torch.Tensor`):
|
||||
Boolean or int tensor of shape (batch_size, sequence_length), 1 means valid and 0 means not valid.
|
||||
query_length (`int`):
|
||||
Sequence length of the input queries.
|
||||
|
||||
Return:
|
||||
query (`torch.Tensor`):
|
||||
@ -409,16 +386,11 @@ def _prepare_from_posids(query, key, value, position_ids, query_length):
|
||||
(max_seqlen_in_batch_q, max_seqlen_in_batch_k) (`tuple[int]`):
|
||||
Maximum sequence length in batch (`max_seqlen_in_batch_q` for the target sequence i.e. query, `max_seqlen_in_batch_k` for the source sequence i.e. key/value).
|
||||
"""
|
||||
kv_length = key.shape[1]
|
||||
is_packed_sequence = query_length == kv_length
|
||||
|
||||
query = query.contiguous().view(-1, query.size(-2), query.size(-1))
|
||||
key = key.contiguous().view(-1, key.size(-2), key.size(-1))
|
||||
value = value.contiguous().view(-1, value.size(-2), value.size(-1))
|
||||
|
||||
(cu_seq_lens_q, cu_seq_lens_k), (max_length_q, max_length_k) = prepare_fa_kwargs_from_position_ids(
|
||||
position_ids, is_packed_sequence=is_packed_sequence
|
||||
)
|
||||
(cu_seq_lens_q, cu_seq_lens_k), (max_length_q, max_length_k) = prepare_fa_kwargs_from_position_ids(position_ids)
|
||||
|
||||
return (query, key, value, (cu_seq_lens_q, cu_seq_lens_k), (max_length_q, max_length_k))
|
||||
|
||||
@ -660,7 +632,7 @@ def _flash_attention_forward(
|
||||
elif is_fa_with_varlen_kwargs or is_fa_with_position_ids:
|
||||
if cu_seq_lens_q is None or cu_seq_lens_k is None:
|
||||
q, k, v, (cu_seq_lens_q, cu_seq_lens_k), (max_length_q, max_length_k) = _prepare_from_posids(
|
||||
query_states, key_states, value_states, position_ids, query_length=query_length
|
||||
query_states, key_states, value_states, position_ids
|
||||
)
|
||||
else:
|
||||
q = query_states.reshape(-1, query_states.size(-2), query_states.size(-1))
|
||||
|
@ -44,12 +44,6 @@ from torch import Tensor, nn
|
||||
from torch.distributions import constraints
|
||||
from torch.utils.checkpoint import checkpoint
|
||||
|
||||
from transformers.utils import is_torchao_available
|
||||
|
||||
|
||||
if is_torchao_available():
|
||||
from torchao.quantization import Int4WeightOnlyConfig
|
||||
|
||||
from .configuration_utils import PretrainedConfig
|
||||
from .distributed import DistributedConfig
|
||||
from .dynamic_module_utils import custom_object_save
|
||||
@ -61,6 +55,7 @@ from .integrations.eager_paged import eager_paged_attention_forward
|
||||
from .integrations.flash_attention import flash_attention_forward
|
||||
from .integrations.flash_paged import paged_attention_forward
|
||||
from .integrations.flex_attention import flex_attention_forward
|
||||
from .integrations.hub_kernels import is_kernel, load_and_register_kernel
|
||||
from .integrations.sdpa_attention import sdpa_attention_forward
|
||||
from .integrations.sdpa_paged import sdpa_attention_paged_forward
|
||||
from .integrations.tensor_parallel import (
|
||||
@ -73,17 +68,8 @@ from .integrations.tensor_parallel import (
|
||||
verify_tp_plan,
|
||||
)
|
||||
from .loss.loss_utils import LOSS_MAPPING
|
||||
from .masking_utils import ALL_MASK_ATTENTION_FUNCTIONS
|
||||
from .modeling_flash_attention_utils import lazy_import_flash_attention
|
||||
from .pytorch_utils import ( # noqa: F401
|
||||
Conv1D,
|
||||
apply_chunking_to_forward,
|
||||
find_pruneable_heads_and_indices,
|
||||
id_tensor_storage,
|
||||
prune_conv1d_layer,
|
||||
prune_layer,
|
||||
prune_linear_layer,
|
||||
)
|
||||
from .pytorch_utils import id_tensor_storage
|
||||
from .quantizers import HfQuantizer
|
||||
from .quantizers.auto import get_hf_quantizer
|
||||
from .quantizers.quantizers_utils import get_module_from_name
|
||||
@ -124,6 +110,7 @@ from .utils import (
|
||||
is_torch_npu_available,
|
||||
is_torch_xla_available,
|
||||
is_torch_xpu_available,
|
||||
is_torchao_available,
|
||||
logging,
|
||||
)
|
||||
from .utils.generic import _CAN_RECORD_REGISTRY, GeneralInterface, OutputRecorder
|
||||
@ -138,9 +125,8 @@ from .utils.import_utils import (
|
||||
from .utils.quantization_config import BitsAndBytesConfig, QuantizationMethod
|
||||
|
||||
|
||||
XLA_USE_BF16 = os.environ.get("XLA_USE_BF16", "0").upper()
|
||||
XLA_DOWNCAST_BF16 = os.environ.get("XLA_DOWNCAST_BF16", "0").upper()
|
||||
|
||||
if is_torchao_available():
|
||||
from torchao.quantization import Int4WeightOnlyConfig
|
||||
|
||||
if is_accelerate_available():
|
||||
from accelerate import dispatch_model, infer_auto_device_map
|
||||
@ -164,32 +150,14 @@ if is_safetensors_available():
|
||||
from safetensors.torch import load_file as safe_load_file
|
||||
from safetensors.torch import save_file as safe_save_file
|
||||
|
||||
if is_peft_available():
|
||||
from .utils import find_adapter_config_file
|
||||
|
||||
if is_kernels_available():
|
||||
from kernels import get_kernel
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
_init_weights = True
|
||||
_is_quantized = False
|
||||
_is_ds_init_called = False
|
||||
_torch_distributed_available = torch.distributed.is_available()
|
||||
|
||||
_is_dtensor_available = _torch_distributed_available and is_torch_greater_or_equal("2.5")
|
||||
if _is_dtensor_available:
|
||||
from torch.distributed.tensor import DTensor
|
||||
|
||||
|
||||
def is_local_dist_rank_0():
|
||||
return (
|
||||
torch.distributed.is_available()
|
||||
and torch.distributed.is_initialized()
|
||||
and int(os.environ.get("LOCAL_RANK", "-1")) == 0
|
||||
)
|
||||
|
||||
|
||||
if is_sagemaker_mp_enabled():
|
||||
import smdistributed.modelparallel.torch as smp
|
||||
from smdistributed.modelparallel import __version__ as SMP_VERSION
|
||||
@ -198,11 +166,24 @@ if is_sagemaker_mp_enabled():
|
||||
else:
|
||||
IS_SAGEMAKER_MP_POST_1_10 = False
|
||||
|
||||
if is_peft_available():
|
||||
from .utils import find_adapter_config_file
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
XLA_USE_BF16 = os.environ.get("XLA_USE_BF16", "0").upper()
|
||||
XLA_DOWNCAST_BF16 = os.environ.get("XLA_DOWNCAST_BF16", "0").upper()
|
||||
SpecificPreTrainedModelType = TypeVar("SpecificPreTrainedModelType", bound="PreTrainedModel")
|
||||
_init_weights = True
|
||||
_is_quantized = False
|
||||
_is_ds_init_called = False
|
||||
|
||||
|
||||
def is_local_dist_rank_0():
|
||||
return (
|
||||
torch.distributed.is_available()
|
||||
and torch.distributed.is_initialized()
|
||||
and int(os.environ.get("LOCAL_RANK", "-1")) == 0
|
||||
)
|
||||
|
||||
|
||||
TORCH_INIT_FUNCTIONS = {
|
||||
"uniform_": nn.init.uniform_,
|
||||
@ -2792,61 +2773,45 @@ class PreTrainedModel(nn.Module, EmbeddingAccessMixin, ModuleUtilsMixin, PushToH
|
||||
`str`: The final attention implementation to use, including potential fallbacks from sdpa to eager, or from
|
||||
None to sdpa (to potentially eager).
|
||||
"""
|
||||
# Register kernel if relevant
|
||||
if attn_implementation is not None and re.match(
|
||||
r"^[^/:]+/[^/:]+(?:@[^/:]+)?(?::[^/:]+)?$", attn_implementation
|
||||
applicable_attn_implementation = attn_implementation
|
||||
# If FA not installed, do not fail but use kernels instead
|
||||
if (
|
||||
applicable_attn_implementation == "flash_attention_2"
|
||||
and self._supports_flash_attn
|
||||
and not is_flash_attn_2_available()
|
||||
and is_kernels_available()
|
||||
):
|
||||
if not is_kernels_available():
|
||||
raise ValueError("kernels is not installed. Please install it with `pip install kernels`.")
|
||||
attention_wrapper = None
|
||||
# FIXME: @ArthurZucker this is dirty, did not want to do a lof of extra work
|
||||
actual_attn_name = attn_implementation
|
||||
if "|" in attn_implementation:
|
||||
attention_wrapper, actual_attn_name = attn_implementation.split("|")
|
||||
# `transformers` has wrapper for sdpa, paged, flash, flex etc.
|
||||
attention_wrapper = ALL_ATTENTION_FUNCTIONS.get(attention_wrapper)
|
||||
# Extract repo_id and kernel_name from the string
|
||||
if ":" in actual_attn_name:
|
||||
repo_id, kernel_name = actual_attn_name.split(":")
|
||||
kernel_name = kernel_name.strip()
|
||||
else:
|
||||
repo_id = actual_attn_name
|
||||
kernel_name = None
|
||||
repo_id = repo_id.strip()
|
||||
# extract the rev after the @ if it exists
|
||||
repo_id, _, rev = repo_id.partition("@")
|
||||
repo_id = repo_id.strip()
|
||||
rev = rev.strip() if rev else None
|
||||
applicable_attn_implementation = "kernels-community/flash-attn"
|
||||
if is_kernel(applicable_attn_implementation):
|
||||
try:
|
||||
kernel = get_kernel(repo_id, revision=rev)
|
||||
if hasattr(kernel, "flash_attn_varlen_func"):
|
||||
if attention_wrapper is None:
|
||||
attention_wrapper = flash_attention_forward
|
||||
kernel_function = partial(attention_wrapper, implementation=kernel)
|
||||
lazy_import_flash_attention(kernel)
|
||||
elif kernel_name is not None:
|
||||
kernel_function = getattr(kernel, kernel_name)
|
||||
ALL_ATTENTION_FUNCTIONS.register(attn_implementation, kernel_function)
|
||||
ALL_MASK_ATTENTION_FUNCTIONS.register(
|
||||
attn_implementation, ALL_MASK_ATTENTION_FUNCTIONS["flash_attention_2"]
|
||||
)
|
||||
load_and_register_kernel(applicable_attn_implementation)
|
||||
# log that we used kernel fallback if successful
|
||||
if attn_implementation == "flash_attention_2":
|
||||
logger.warning_once(
|
||||
"You do not have `flash_attn` installed, using `kernels-community/flash-attn` from the `kernels` "
|
||||
"library instead!"
|
||||
)
|
||||
except Exception as e:
|
||||
if attn_implementation == "flash_attention_2":
|
||||
self._flash_attn_2_can_dispatch() # will fail as fa2 is not available but raise the proper exception
|
||||
logger.warning_once(
|
||||
f"Could not find a kernel repository '{repo_id}' compatible with your device in the hub: {e}. Using "
|
||||
"default attention implementation instead (sdpa if available, eager otherwise)."
|
||||
f"Could not find a kernel matching `{applicable_attn_implementation}` compatible with your device in the "
|
||||
f"hub:\n{e}.\nUsing default attention implementation instead (sdpa if available, eager otherwise)."
|
||||
)
|
||||
try:
|
||||
self._sdpa_can_dispatch(is_init_check)
|
||||
attn_implementation = "sdpa"
|
||||
applicable_attn_implementation = "sdpa"
|
||||
except (ValueError, ImportError) as e:
|
||||
attn_implementation = "eager"
|
||||
applicable_attn_implementation = "eager"
|
||||
else:
|
||||
attn_implementation = self.get_correct_attn_implementation(attn_implementation, is_init_check)
|
||||
applicable_attn_implementation = self.get_correct_attn_implementation(
|
||||
applicable_attn_implementation, is_init_check
|
||||
)
|
||||
# preload flash attention here to allow compile with fullgraph
|
||||
if attn_implementation.startswith("flash_attention"):
|
||||
lazy_import_flash_attention(attn_implementation)
|
||||
if applicable_attn_implementation.startswith("flash_attention"):
|
||||
lazy_import_flash_attention(applicable_attn_implementation)
|
||||
|
||||
return attn_implementation
|
||||
return applicable_attn_implementation
|
||||
|
||||
def get_correct_attn_implementation(self, requested_attention: Optional[str], is_init_check: bool = False) -> str:
|
||||
applicable_attention = "sdpa" if requested_attention is None else requested_attention
|
||||
@ -3035,11 +3000,14 @@ class PreTrainedModel(nn.Module, EmbeddingAccessMixin, ModuleUtilsMixin, PushToH
|
||||
|
||||
if hasattr(self, "model"):
|
||||
inner = self.model
|
||||
if hasattr(inner, "get_decoder"):
|
||||
# See: https://github.com/huggingface/transformers/issues/40815
|
||||
if hasattr(inner, "get_decoder") and type(inner) is not type(self):
|
||||
return inner.get_decoder()
|
||||
return inner
|
||||
|
||||
return None # raise AttributeError(f"{self.__class__.__name__} has no decoder; override `get_decoder()` if needed.")
|
||||
# If this is a base transformer model (no decoder/model attributes), return self
|
||||
# This handles cases like MistralModel which is itself the decoder
|
||||
return self
|
||||
|
||||
def set_decoder(self, decoder):
|
||||
"""
|
||||
@ -3058,7 +3026,7 @@ class PreTrainedModel(nn.Module, EmbeddingAccessMixin, ModuleUtilsMixin, PushToH
|
||||
self.model = decoder
|
||||
return
|
||||
|
||||
return # raise AttributeError(f"{self.__class__.__name__} cannot accept a decoder; override `set_decoder()`.")
|
||||
return
|
||||
|
||||
def _init_weights(self, module):
|
||||
"""
|
||||
|
@ -1,269 +0,0 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import argparse
|
||||
import gc
|
||||
import os
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from huggingface_hub import snapshot_download
|
||||
from safetensors import safe_open
|
||||
|
||||
from transformers import (
|
||||
Aimv2Config,
|
||||
Aimv2Model,
|
||||
Aimv2VisionConfig,
|
||||
Aimv2VisionModel,
|
||||
AutoImageProcessor,
|
||||
AutoProcessor,
|
||||
)
|
||||
|
||||
|
||||
ORIGINAL_TO_CONVERTED_KEY_MAPPING_VISION_MODEL = {
|
||||
# Embeddings
|
||||
r"preprocessor.patchifier.proj": r"embeddings.patch_embed",
|
||||
r"preprocessor.pos_embed": r"embeddings.position_embedding.weight",
|
||||
r"preprocessor.patchifier.norm.weight": r"embeddings.rms_norm.weight",
|
||||
# Encoder Layers
|
||||
r"trunk.blocks.(\d+).attn.qkv": r"encoder.layers.\1.attention.qkv",
|
||||
r"trunk.blocks.(\d+).attn.proj": r"encoder.layers.\1.attention.out_proj",
|
||||
r"trunk.blocks.(\d+).mlp.fc1": r"encoder.layers.\1.ffn.gate_proj",
|
||||
r"trunk.blocks.(\d+).mlp.fc2": r"encoder.layers.\1.ffn.down_proj",
|
||||
r"trunk.blocks.(\d+).mlp.fc3": r"encoder.layers.\1.ffn.up_proj",
|
||||
# Normalization Layers
|
||||
r"trunk.blocks.(\d+).norm_1": r"encoder.layers.\1.rms_norm1",
|
||||
r"trunk.blocks.(\d+).norm_2": r"encoder.layers.\1.rms_norm2",
|
||||
# Final Norm
|
||||
r"trunk.post_trunk_norm": r"rms_norm",
|
||||
}
|
||||
|
||||
ORIGINAL_TO_CONVERTED_KEY_MAPPING = {
|
||||
# Vision Embeddings
|
||||
r"image_encoder.preprocessor.patchifier.proj": r"vision_model.embeddings.patch_embed",
|
||||
r"image_encoder.preprocessor.pos_embed": r"vision_model.embeddings.position_embedding.weight",
|
||||
r"image_encoder.preprocessor.patchifier.norm.weight": r"vision_model.embeddings.rms_norm.weight",
|
||||
# Vision Encoder Layers
|
||||
r"image_encoder.trunk.blocks.(\d+).attn.qkv": r"vision_model.encoder.layers.\1.attention.qkv",
|
||||
r"image_encoder.trunk.blocks.(\d+).attn.proj": r"vision_model.encoder.layers.\1.attention.out_proj",
|
||||
r"image_encoder.trunk.blocks.(\d+).mlp.fc1": r"vision_model.encoder.layers.\1.ffn.gate_proj",
|
||||
r"image_encoder.trunk.blocks.(\d+).mlp.fc2": r"vision_model.encoder.layers.\1.ffn.down_proj",
|
||||
r"image_encoder.trunk.blocks.(\d+).mlp.fc3": r"vision_model.encoder.layers.\1.ffn.up_proj",
|
||||
# Normalization Layers
|
||||
r"image_encoder.trunk.blocks.(\d+).norm_1": r"vision_model.encoder.layers.\1.rms_norm1",
|
||||
r"image_encoder.trunk.blocks.(\d+).norm_2": r"vision_model.encoder.layers.\1.rms_norm2",
|
||||
r"image_encoder.trunk.post_trunk_norm": r"vision_model.rms_norm",
|
||||
r"image_projector": r"visual_projection",
|
||||
# Vision Head
|
||||
r"image_encoder.head.cls_token": r"vision_model.head.cls_token",
|
||||
r"image_encoder.head.k": r"vision_model.head.k_proj",
|
||||
r"image_encoder.head.v": r"vision_model.head.v_proj",
|
||||
r"image_encoder.head.linear": r"vision_model.head.output_proj",
|
||||
# Text Embeddings
|
||||
r"text_encoder.preprocessor.text_embedding.weight": r"text_model.embeddings.token_embedding.weight",
|
||||
r"text_encoder.preprocessor.positional_embedding": r"text_model.embeddings.position_embedding.weight",
|
||||
# Text Encoder Layers
|
||||
r"text_encoder.trunk.blocks.(\d+).attn.qkv": r"text_model.encoder.layers.\1.attention.qkv",
|
||||
r"text_encoder.trunk.blocks.(\d+).attn.proj": r"text_model.encoder.layers.\1.attention.out_proj",
|
||||
r"text_encoder.trunk.blocks.(\d+).mlp.fc1": r"text_model.encoder.layers.\1.ffn.gate_proj",
|
||||
r"text_encoder.trunk.blocks.(\d+).mlp.fc2": r"text_model.encoder.layers.\1.ffn.down_proj",
|
||||
r"text_encoder.trunk.blocks.(\d+).mlp.fc3": r"text_model.encoder.layers.\1.ffn.up_proj",
|
||||
# Text Normalization Layers
|
||||
r"text_encoder.trunk.blocks.(\d+).norm_1": r"text_model.encoder.layers.\1.rms_norm1",
|
||||
r"text_encoder.trunk.blocks.(\d+).norm_2": r"text_model.encoder.layers.\1.rms_norm2",
|
||||
r"text_encoder.trunk.post_trunk_norm": r"text_model.rms_norm",
|
||||
r"text_projector": r"text_projection",
|
||||
r"log_logit_scale": r"logit_scale",
|
||||
}
|
||||
|
||||
|
||||
def load_original_state_dict(model_id: str, revision: Optional[str] = None) -> dict[str, torch.Tensor]:
|
||||
# Download only the model.safetensors file
|
||||
directory_path = snapshot_download(
|
||||
repo_id=model_id,
|
||||
revision=revision,
|
||||
allow_patterns=["model.safetensors"],
|
||||
)
|
||||
|
||||
original_state_dict = {}
|
||||
safetensor_path = f"{directory_path}/model.safetensors"
|
||||
|
||||
with safe_open(safetensor_path, framework="pt", device="cpu") as f:
|
||||
for key in f.keys():
|
||||
original_state_dict[key] = f.get_tensor(key)
|
||||
|
||||
return original_state_dict
|
||||
|
||||
|
||||
def convert_old_keys_to_new_keys(state_dict_keys: dict, ORIGINAL_TO_CONVERTED_KEY_MAPPING: dict):
|
||||
"""Converts state dict keys from the old format to the new format."""
|
||||
|
||||
output_dict = {}
|
||||
if state_dict_keys is not None:
|
||||
old_text = "\n".join(state_dict_keys)
|
||||
new_text = old_text
|
||||
for pattern, replacement in ORIGINAL_TO_CONVERTED_KEY_MAPPING.items():
|
||||
if replacement is None:
|
||||
new_text = re.sub(pattern, "", new_text) # an empty line
|
||||
continue
|
||||
new_text = re.sub(pattern, replacement, new_text)
|
||||
output_dict = dict(zip(old_text.split("\n"), new_text.split("\n")))
|
||||
return output_dict
|
||||
|
||||
|
||||
def split_qkv_tensor(key, tensor):
|
||||
"""Splits a qkv tensor into separate q, k, v tensors and updates the key accordingly."""
|
||||
|
||||
new_keys = ["q_proj", "k_proj", "v_proj"]
|
||||
split_size = tensor.shape[0] // 3
|
||||
split_tensors = torch.split(tensor, split_size, dim=0)
|
||||
|
||||
return {key.replace("qkv", new_key): split_tensors[i] for i, new_key in enumerate(new_keys)}
|
||||
|
||||
|
||||
def get_model_config_mapping(model_id: str):
|
||||
"""Determines the correct model, config, and key mappings based on the checkpoint name."""
|
||||
|
||||
if model_id == "apple/aimv2-large-patch14-224-lit":
|
||||
return Aimv2Model, Aimv2Config, ORIGINAL_TO_CONVERTED_KEY_MAPPING
|
||||
else:
|
||||
return Aimv2VisionModel, Aimv2VisionConfig, ORIGINAL_TO_CONVERTED_KEY_MAPPING_VISION_MODEL
|
||||
|
||||
|
||||
def write_model(
|
||||
hf_repo_id: str,
|
||||
output_dir: str,
|
||||
safe_serialization: bool = True,
|
||||
):
|
||||
"""
|
||||
Converts a model checkpoint to Hugging Face format and saves it.
|
||||
|
||||
Args:
|
||||
hf_repo_id (str): The Hugging Face repo ID to load from.
|
||||
output_dir (str): The directory to save the converted model.
|
||||
safe_serialization (bool): Whether to use safe serialization.
|
||||
|
||||
Returns:
|
||||
model: The reloaded Hugging Face model.
|
||||
"""
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# Get the appropriate model, config, and key mapping
|
||||
model_class, config_class, key_mapping = get_model_config_mapping(hf_repo_id)
|
||||
|
||||
# Load config and original state dict
|
||||
config = config_class.from_pretrained(hf_repo_id)
|
||||
|
||||
# Checkpoint `apple/aimv2-large-patch14-224-lit` uses AttentionPoolingHead hence set the required attr in config.
|
||||
if hf_repo_id != "apple/aimv2-large-patch14-224-lit":
|
||||
config.use_head = False
|
||||
|
||||
if hf_repo_id == "apple/aimv2-large-patch14-native":
|
||||
config.is_native = True
|
||||
|
||||
original_state_dict = load_original_state_dict(hf_repo_id)
|
||||
|
||||
print("Converting model...")
|
||||
|
||||
state_dict = {}
|
||||
result = convert_old_keys_to_new_keys(original_state_dict, key_mapping)
|
||||
all_keys = list(original_state_dict.keys())
|
||||
|
||||
for key in all_keys:
|
||||
value = original_state_dict[key]
|
||||
new_key = result.pop(key)
|
||||
|
||||
if "qkv" in new_key:
|
||||
qkv_state_dict = split_qkv_tensor(new_key, value)
|
||||
state_dict.update(qkv_state_dict)
|
||||
else:
|
||||
state_dict[new_key] = value
|
||||
|
||||
# Check if position embeddings exist before squeezing
|
||||
if new_key.endswith("position_embedding.weight"):
|
||||
state_dict[new_key] = value.squeeze(0)
|
||||
|
||||
print(f"Loading the checkpoint in a {model_class.__name__}.")
|
||||
model = model_class(config)
|
||||
model.load_state_dict(state_dict, strict=True, assign=True)
|
||||
print("Checkpoint loaded successfully.")
|
||||
|
||||
print("Saving the model.")
|
||||
model.save_pretrained(output_dir, safe_serialization=safe_serialization)
|
||||
del state_dict, model
|
||||
gc.collect()
|
||||
|
||||
print("Reloading the model to check if it's saved correctly.")
|
||||
model = model_class.from_pretrained(output_dir, device_map="auto")
|
||||
print("Model reloaded successfully.")
|
||||
return model
|
||||
|
||||
|
||||
def write_image_processor(hf_repo_id: str, output_dir: str):
|
||||
if hf_repo_id == "apple/aimv2-large-patch14-224-lit":
|
||||
image_processor = AutoProcessor.from_pretrained(hf_repo_id, use_fast=True)
|
||||
else:
|
||||
image_processor = AutoImageProcessor.from_pretrained(hf_repo_id, use_fast=True)
|
||||
image_processor.save_pretrained(output_dir)
|
||||
return image_processor
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--hf_repo_id",
|
||||
default="apple/aimv2-large-patch14-224",
|
||||
help="Location of official weights from apple on HF",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output_dir",
|
||||
default="aimv2_model",
|
||||
help="Location to write the converted model and processor",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--safe_serialization", default=True, type=bool, help="Whether or not to save using `safetensors`."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--push_to_hub",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
help="Whether or not to push the converted model to the huggingface hub.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hub_repo_id",
|
||||
default=None,
|
||||
help="Huggingface hub repo to write the converted model and processor",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
model = write_model(
|
||||
hf_repo_id=args.hf_repo_id,
|
||||
output_dir=args.output_dir,
|
||||
safe_serialization=args.safe_serialization,
|
||||
)
|
||||
|
||||
image_processor = write_image_processor(
|
||||
hf_repo_id=args.hf_repo_id,
|
||||
output_dir=args.output_dir,
|
||||
)
|
||||
|
||||
if args.push_to_hub:
|
||||
print("Pushing to hub...")
|
||||
model.push_to_hub(args.hub_repo_id)
|
||||
image_processor.push_to_hub(args.hub_repo_id)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -613,11 +613,11 @@ class Aimv2TextModel(Aimv2PreTrainedModel):
|
||||
|
||||
|
||||
@auto_docstring
|
||||
class Aimv2Model(CLIPModel, nn.Module):
|
||||
class Aimv2Model(CLIPModel):
|
||||
_supports_flash_attn = True
|
||||
|
||||
def __init__(self, config: Aimv2Config):
|
||||
nn.Module().__init__(config)
|
||||
PreTrainedModel.__init__(self, config)
|
||||
|
||||
self.projection_dim = config.projection_dim
|
||||
self.vision_embed_dim = config.vision_config.hidden_size
|
||||
|
@ -1,62 +0,0 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Convert ALBERT checkpoint."""
|
||||
|
||||
import argparse
|
||||
|
||||
import torch
|
||||
|
||||
from ...utils import logging
|
||||
from . import AlbertConfig, AlbertForPreTraining, load_tf_weights_in_albert
|
||||
|
||||
|
||||
logging.set_verbosity_info()
|
||||
|
||||
|
||||
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, albert_config_file, pytorch_dump_path):
|
||||
# Initialise PyTorch model
|
||||
config = AlbertConfig.from_json_file(albert_config_file)
|
||||
print(f"Building PyTorch model from configuration: {config}")
|
||||
model = AlbertForPreTraining(config)
|
||||
|
||||
# Load weights from tf checkpoint
|
||||
load_tf_weights_in_albert(model, config, tf_checkpoint_path)
|
||||
|
||||
# Save pytorch-model
|
||||
print(f"Save PyTorch model to {pytorch_dump_path}")
|
||||
torch.save(model.state_dict(), pytorch_dump_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
# Required parameters
|
||||
parser.add_argument(
|
||||
"--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--albert_config_file",
|
||||
default=None,
|
||||
type=str,
|
||||
required=True,
|
||||
help=(
|
||||
"The config json file corresponding to the pre-trained ALBERT model. \n"
|
||||
"This specifies the model architecture."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.albert_config_file, args.pytorch_dump_path)
|
@ -1,389 +0,0 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2023 The HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Convert ALIGN checkpoints from the original repository."""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import align
|
||||
import numpy as np
|
||||
import requests
|
||||
import tensorflow as tf
|
||||
import torch
|
||||
from PIL import Image
|
||||
from tokenizer import Tokenizer
|
||||
|
||||
from transformers import (
|
||||
AlignConfig,
|
||||
AlignModel,
|
||||
AlignProcessor,
|
||||
BertConfig,
|
||||
BertTokenizer,
|
||||
EfficientNetConfig,
|
||||
EfficientNetImageProcessor,
|
||||
)
|
||||
from transformers.utils import logging
|
||||
|
||||
|
||||
logging.set_verbosity_info()
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
def preprocess(image):
|
||||
image = tf.image.resize(image, (346, 346))
|
||||
image = tf.image.crop_to_bounding_box(image, (346 - 289) // 2, (346 - 289) // 2, 289, 289)
|
||||
return image
|
||||
|
||||
|
||||
def get_align_config():
|
||||
vision_config = EfficientNetConfig.from_pretrained("google/efficientnet-b7")
|
||||
vision_config.image_size = 289
|
||||
vision_config.hidden_dim = 640
|
||||
vision_config.id2label = {"0": "LABEL_0", "1": "LABEL_1"}
|
||||
vision_config.label2id = {"LABEL_0": 0, "LABEL_1": 1}
|
||||
vision_config.depthwise_padding = []
|
||||
|
||||
text_config = BertConfig()
|
||||
config = AlignConfig.from_text_vision_configs(
|
||||
text_config=text_config, vision_config=vision_config, projection_dim=640
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
# We will verify our results on an image of cute cats
|
||||
def prepare_img():
|
||||
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
||||
im = Image.open(requests.get(url, stream=True).raw)
|
||||
return im
|
||||
|
||||
|
||||
def get_processor():
|
||||
image_processor = EfficientNetImageProcessor(
|
||||
do_center_crop=True,
|
||||
rescale_factor=1 / 127.5,
|
||||
rescale_offset=True,
|
||||
do_normalize=False,
|
||||
include_top=False,
|
||||
resample=Image.BILINEAR,
|
||||
)
|
||||
tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
|
||||
tokenizer.model_max_length = 64
|
||||
processor = AlignProcessor(image_processor=image_processor, tokenizer=tokenizer)
|
||||
return processor
|
||||
|
||||
|
||||
# here we list all keys to be renamed (original name on the left, our name on the right)
|
||||
def rename_keys(original_param_names):
|
||||
# EfficientNet image encoder
|
||||
block_names = [v.split("_")[0].split("block")[1] for v in original_param_names if v.startswith("block")]
|
||||
block_names = list(set(block_names))
|
||||
block_names = sorted(block_names)
|
||||
num_blocks = len(block_names)
|
||||
block_name_mapping = {b: str(i) for b, i in zip(block_names, range(num_blocks))}
|
||||
|
||||
rename_keys = []
|
||||
rename_keys.append(("stem_conv/kernel:0", "embeddings.convolution.weight"))
|
||||
rename_keys.append(("stem_bn/gamma:0", "embeddings.batchnorm.weight"))
|
||||
rename_keys.append(("stem_bn/beta:0", "embeddings.batchnorm.bias"))
|
||||
rename_keys.append(("stem_bn/moving_mean:0", "embeddings.batchnorm.running_mean"))
|
||||
rename_keys.append(("stem_bn/moving_variance:0", "embeddings.batchnorm.running_var"))
|
||||
|
||||
for b in block_names:
|
||||
hf_b = block_name_mapping[b]
|
||||
rename_keys.append((f"block{b}_expand_conv/kernel:0", f"encoder.blocks.{hf_b}.expansion.expand_conv.weight"))
|
||||
rename_keys.append((f"block{b}_expand_bn/gamma:0", f"encoder.blocks.{hf_b}.expansion.expand_bn.weight"))
|
||||
rename_keys.append((f"block{b}_expand_bn/beta:0", f"encoder.blocks.{hf_b}.expansion.expand_bn.bias"))
|
||||
rename_keys.append(
|
||||
(f"block{b}_expand_bn/moving_mean:0", f"encoder.blocks.{hf_b}.expansion.expand_bn.running_mean")
|
||||
)
|
||||
rename_keys.append(
|
||||
(f"block{b}_expand_bn/moving_variance:0", f"encoder.blocks.{hf_b}.expansion.expand_bn.running_var")
|
||||
)
|
||||
rename_keys.append(
|
||||
(f"block{b}_dwconv/depthwise_kernel:0", f"encoder.blocks.{hf_b}.depthwise_conv.depthwise_conv.weight")
|
||||
)
|
||||
rename_keys.append((f"block{b}_bn/gamma:0", f"encoder.blocks.{hf_b}.depthwise_conv.depthwise_norm.weight"))
|
||||
rename_keys.append((f"block{b}_bn/beta:0", f"encoder.blocks.{hf_b}.depthwise_conv.depthwise_norm.bias"))
|
||||
rename_keys.append(
|
||||
(f"block{b}_bn/moving_mean:0", f"encoder.blocks.{hf_b}.depthwise_conv.depthwise_norm.running_mean")
|
||||
)
|
||||
rename_keys.append(
|
||||
(f"block{b}_bn/moving_variance:0", f"encoder.blocks.{hf_b}.depthwise_conv.depthwise_norm.running_var")
|
||||
)
|
||||
|
||||
rename_keys.append((f"block{b}_se_reduce/kernel:0", f"encoder.blocks.{hf_b}.squeeze_excite.reduce.weight"))
|
||||
rename_keys.append((f"block{b}_se_reduce/bias:0", f"encoder.blocks.{hf_b}.squeeze_excite.reduce.bias"))
|
||||
rename_keys.append((f"block{b}_se_expand/kernel:0", f"encoder.blocks.{hf_b}.squeeze_excite.expand.weight"))
|
||||
rename_keys.append((f"block{b}_se_expand/bias:0", f"encoder.blocks.{hf_b}.squeeze_excite.expand.bias"))
|
||||
rename_keys.append(
|
||||
(f"block{b}_project_conv/kernel:0", f"encoder.blocks.{hf_b}.projection.project_conv.weight")
|
||||
)
|
||||
rename_keys.append((f"block{b}_project_bn/gamma:0", f"encoder.blocks.{hf_b}.projection.project_bn.weight"))
|
||||
rename_keys.append((f"block{b}_project_bn/beta:0", f"encoder.blocks.{hf_b}.projection.project_bn.bias"))
|
||||
rename_keys.append(
|
||||
(f"block{b}_project_bn/moving_mean:0", f"encoder.blocks.{hf_b}.projection.project_bn.running_mean")
|
||||
)
|
||||
rename_keys.append(
|
||||
(f"block{b}_project_bn/moving_variance:0", f"encoder.blocks.{hf_b}.projection.project_bn.running_var")
|
||||
)
|
||||
|
||||
key_mapping = {}
|
||||
for item in rename_keys:
|
||||
if item[0] in original_param_names:
|
||||
key_mapping[item[0]] = "vision_model." + item[1]
|
||||
|
||||
# BERT text encoder
|
||||
rename_keys = []
|
||||
old = "tf_bert_model/bert"
|
||||
new = "text_model"
|
||||
for i in range(12):
|
||||
rename_keys.append(
|
||||
(
|
||||
f"{old}/encoder/layer_._{i}/attention/self/query/kernel:0",
|
||||
f"{new}.encoder.layer.{i}.attention.self.query.weight",
|
||||
)
|
||||
)
|
||||
rename_keys.append(
|
||||
(
|
||||
f"{old}/encoder/layer_._{i}/attention/self/query/bias:0",
|
||||
f"{new}.encoder.layer.{i}.attention.self.query.bias",
|
||||
)
|
||||
)
|
||||
rename_keys.append(
|
||||
(
|
||||
f"{old}/encoder/layer_._{i}/attention/self/key/kernel:0",
|
||||
f"{new}.encoder.layer.{i}.attention.self.key.weight",
|
||||
)
|
||||
)
|
||||
rename_keys.append(
|
||||
(
|
||||
f"{old}/encoder/layer_._{i}/attention/self/key/bias:0",
|
||||
f"{new}.encoder.layer.{i}.attention.self.key.bias",
|
||||
)
|
||||
)
|
||||
rename_keys.append(
|
||||
(
|
||||
f"{old}/encoder/layer_._{i}/attention/self/value/kernel:0",
|
||||
f"{new}.encoder.layer.{i}.attention.self.value.weight",
|
||||
)
|
||||
)
|
||||
rename_keys.append(
|
||||
(
|
||||
f"{old}/encoder/layer_._{i}/attention/self/value/bias:0",
|
||||
f"{new}.encoder.layer.{i}.attention.self.value.bias",
|
||||
)
|
||||
)
|
||||
rename_keys.append(
|
||||
(
|
||||
f"{old}/encoder/layer_._{i}/attention/output/dense/kernel:0",
|
||||
f"{new}.encoder.layer.{i}.attention.output.dense.weight",
|
||||
)
|
||||
)
|
||||
rename_keys.append(
|
||||
(
|
||||
f"{old}/encoder/layer_._{i}/attention/output/dense/bias:0",
|
||||
f"{new}.encoder.layer.{i}.attention.output.dense.bias",
|
||||
)
|
||||
)
|
||||
rename_keys.append(
|
||||
(
|
||||
f"{old}/encoder/layer_._{i}/attention/output/LayerNorm/gamma:0",
|
||||
f"{new}.encoder.layer.{i}.attention.output.LayerNorm.weight",
|
||||
)
|
||||
)
|
||||
rename_keys.append(
|
||||
(
|
||||
f"{old}/encoder/layer_._{i}/attention/output/LayerNorm/beta:0",
|
||||
f"{new}.encoder.layer.{i}.attention.output.LayerNorm.bias",
|
||||
)
|
||||
)
|
||||
rename_keys.append(
|
||||
(
|
||||
f"{old}/encoder/layer_._{i}/intermediate/dense/kernel:0",
|
||||
f"{new}.encoder.layer.{i}.intermediate.dense.weight",
|
||||
)
|
||||
)
|
||||
rename_keys.append(
|
||||
(
|
||||
f"{old}/encoder/layer_._{i}/intermediate/dense/bias:0",
|
||||
f"{new}.encoder.layer.{i}.intermediate.dense.bias",
|
||||
)
|
||||
)
|
||||
rename_keys.append(
|
||||
(f"{old}/encoder/layer_._{i}/output/dense/kernel:0", f"{new}.encoder.layer.{i}.output.dense.weight")
|
||||
)
|
||||
rename_keys.append(
|
||||
(f"{old}/encoder/layer_._{i}/output/dense/bias:0", f"{new}.encoder.layer.{i}.output.dense.bias")
|
||||
)
|
||||
rename_keys.append(
|
||||
(f"{old}/encoder/layer_._{i}/output/LayerNorm/gamma:0", f"{new}.encoder.layer.{i}.output.LayerNorm.weight")
|
||||
)
|
||||
rename_keys.append(
|
||||
(f"{old}/encoder/layer_._{i}/output/LayerNorm/beta:0", f"{new}.encoder.layer.{i}.output.LayerNorm.bias")
|
||||
)
|
||||
|
||||
rename_keys.append((f"{old}/embeddings/word_embeddings/weight:0", f"{new}.embeddings.word_embeddings.weight"))
|
||||
rename_keys.append(
|
||||
(f"{old}/embeddings/position_embeddings/embeddings:0", f"{new}.embeddings.position_embeddings.weight")
|
||||
)
|
||||
rename_keys.append(
|
||||
(f"{old}/embeddings/token_type_embeddings/embeddings:0", f"{new}.embeddings.token_type_embeddings.weight")
|
||||
)
|
||||
rename_keys.append((f"{old}/embeddings/LayerNorm/gamma:0", f"{new}.embeddings.LayerNorm.weight"))
|
||||
rename_keys.append((f"{old}/embeddings/LayerNorm/beta:0", f"{new}.embeddings.LayerNorm.bias"))
|
||||
|
||||
rename_keys.append((f"{old}/pooler/dense/kernel:0", f"{new}.pooler.dense.weight"))
|
||||
rename_keys.append((f"{old}/pooler/dense/bias:0", f"{new}.pooler.dense.bias"))
|
||||
rename_keys.append(("dense/kernel:0", "text_projection.weight"))
|
||||
rename_keys.append(("dense/bias:0", "text_projection.bias"))
|
||||
rename_keys.append(("dense/bias:0", "text_projection.bias"))
|
||||
rename_keys.append(("temperature:0", "temperature"))
|
||||
|
||||
for item in rename_keys:
|
||||
if item[0] in original_param_names:
|
||||
key_mapping[item[0]] = item[1]
|
||||
return key_mapping
|
||||
|
||||
|
||||
def replace_params(hf_params, tf_params, key_mapping):
|
||||
list(hf_params.keys())
|
||||
|
||||
for key, value in tf_params.items():
|
||||
if key not in key_mapping:
|
||||
continue
|
||||
|
||||
hf_key = key_mapping[key]
|
||||
if "_conv" in key and "kernel" in key:
|
||||
new_hf_value = torch.from_numpy(value).permute(3, 2, 0, 1)
|
||||
elif "embeddings" in key:
|
||||
new_hf_value = torch.from_numpy(value)
|
||||
elif "depthwise_kernel" in key:
|
||||
new_hf_value = torch.from_numpy(value).permute(2, 3, 0, 1)
|
||||
elif "kernel" in key:
|
||||
new_hf_value = torch.from_numpy(np.transpose(value))
|
||||
elif "temperature" in key:
|
||||
new_hf_value = value
|
||||
elif "bn/gamma" in key or "bn/beta" in key:
|
||||
new_hf_value = torch.from_numpy(np.transpose(value)).squeeze()
|
||||
else:
|
||||
new_hf_value = torch.from_numpy(value)
|
||||
|
||||
# Replace HF parameters with original TF model parameters
|
||||
hf_params[hf_key].copy_(new_hf_value)
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def convert_align_checkpoint(checkpoint_path, pytorch_dump_folder_path, save_model, push_to_hub):
|
||||
"""
|
||||
Copy/paste/tweak model's weights to our ALIGN structure.
|
||||
"""
|
||||
# Load original model
|
||||
seq_length = 64
|
||||
tok = Tokenizer(seq_length)
|
||||
original_model = align.Align("efficientnet-b7", "bert-base", 640, seq_length, tok.get_vocab_size())
|
||||
original_model.compile()
|
||||
original_model.load_weights(checkpoint_path)
|
||||
|
||||
tf_params = original_model.trainable_variables
|
||||
tf_non_train_params = original_model.non_trainable_variables
|
||||
tf_params = {param.name: param.numpy() for param in tf_params}
|
||||
for param in tf_non_train_params:
|
||||
tf_params[param.name] = param.numpy()
|
||||
tf_param_names = list(tf_params.keys())
|
||||
|
||||
# Load HuggingFace model
|
||||
config = get_align_config()
|
||||
hf_model = AlignModel(config).eval()
|
||||
hf_params = hf_model.state_dict()
|
||||
|
||||
# Create src-to-dst parameter name mapping dictionary
|
||||
print("Converting parameters...")
|
||||
key_mapping = rename_keys(tf_param_names)
|
||||
replace_params(hf_params, tf_params, key_mapping)
|
||||
|
||||
# Initialize processor
|
||||
processor = get_processor()
|
||||
inputs = processor(
|
||||
images=prepare_img(), text="A picture of a cat", padding="max_length", max_length=64, return_tensors="pt"
|
||||
)
|
||||
|
||||
# HF model inference
|
||||
hf_model.eval()
|
||||
with torch.no_grad():
|
||||
outputs = hf_model(**inputs)
|
||||
|
||||
hf_image_features = outputs.image_embeds.detach().numpy()
|
||||
hf_text_features = outputs.text_embeds.detach().numpy()
|
||||
|
||||
# Original model inference
|
||||
original_model.trainable = False
|
||||
tf_image_processor = EfficientNetImageProcessor(
|
||||
do_center_crop=True,
|
||||
do_rescale=False,
|
||||
do_normalize=False,
|
||||
include_top=False,
|
||||
resample=Image.BILINEAR,
|
||||
)
|
||||
image = tf_image_processor(images=prepare_img(), return_tensors="tf", data_format="channels_last")["pixel_values"]
|
||||
text = tok(tf.constant(["A picture of a cat"]))
|
||||
|
||||
image_features = original_model.image_encoder(image, training=False)
|
||||
text_features = original_model.text_encoder(text, training=False)
|
||||
|
||||
image_features = tf.nn.l2_normalize(image_features, axis=-1)
|
||||
text_features = tf.nn.l2_normalize(text_features, axis=-1)
|
||||
|
||||
# Check whether original and HF model outputs match -> np.allclose
|
||||
if not np.allclose(image_features, hf_image_features, atol=1e-3):
|
||||
raise ValueError("The predicted image features are not the same.")
|
||||
if not np.allclose(text_features, hf_text_features, atol=1e-3):
|
||||
raise ValueError("The predicted text features are not the same.")
|
||||
print("Model outputs match!")
|
||||
|
||||
if save_model:
|
||||
# Create folder to save model
|
||||
if not os.path.isdir(pytorch_dump_folder_path):
|
||||
os.mkdir(pytorch_dump_folder_path)
|
||||
# Save converted model and image processor
|
||||
hf_model.save_pretrained(pytorch_dump_folder_path)
|
||||
processor.save_pretrained(pytorch_dump_folder_path)
|
||||
|
||||
if push_to_hub:
|
||||
# Push model and image processor to hub
|
||||
print("Pushing converted ALIGN to the hub...")
|
||||
processor.push_to_hub("align-base")
|
||||
hf_model.push_to_hub("align-base")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
# Required parameters
|
||||
parser.add_argument(
|
||||
"--checkpoint_path",
|
||||
default="./weights/model-weights",
|
||||
type=str,
|
||||
help="Path to the pretrained TF ALIGN checkpoint.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pytorch_dump_folder_path",
|
||||
default="hf_model",
|
||||
type=str,
|
||||
help="Path to the output PyTorch model directory.",
|
||||
)
|
||||
parser.add_argument("--save_model", action="store_true", help="Save model to local")
|
||||
parser.add_argument("--push_to_hub", action="store_true", help="Push model and image processor to the hub")
|
||||
|
||||
args = parser.parse_args()
|
||||
convert_align_checkpoint(args.checkpoint_path, args.pytorch_dump_folder_path, args.save_model, args.push_to_hub)
|
32
src/transformers/models/apertus/__init__.py
Normal file
32
src/transformers/models/apertus/__init__.py
Normal file
@ -0,0 +1,32 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2025 The HuggingFace Inc. team and the Swiss AI Initiative. All rights reserved.
|
||||
#
|
||||
# This code is based on HuggingFace's LLaMA implementation in this library.
|
||||
# It has been modified from its original forms to accommodate the architectural
|
||||
# differences made by the Swiss AI Initiative that trained the model.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import _LazyModule
|
||||
from ...utils.import_utils import define_import_structure
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .configuration_apertus import *
|
||||
from .modeling_apertus import *
|
||||
else:
|
||||
import sys
|
||||
|
||||
_file = globals()["__file__"]
|
||||
sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user