mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-21 01:23:56 +08:00
Compare commits
16 Commits
v4.52.1
...
change_bui
Author | SHA1 | Date | |
---|---|---|---|
38a8ce45eb | |||
0a246a5a5d | |||
0d72f20449 | |||
1cb4b7fb6a | |||
f3fb6164f2 | |||
2b8774a7c3 | |||
adeb8cddf1 | |||
148e3159d4 | |||
cc76a4f113 | |||
0202f862ae | |||
6829936ee0 | |||
e288ee00d8 | |||
711d78d104 | |||
feec294dea | |||
cb513e35f9 | |||
f4ef41c45e |
51
.github/workflows/check_failed_model_tests.yml
vendored
51
.github/workflows/check_failed_model_tests.yml
vendored
@ -39,55 +39,100 @@ jobs:
|
||||
name: ci_results_run_models_gpu
|
||||
path: /transformers/ci_results_run_models_gpu
|
||||
|
||||
- name: Check file
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
if [ -f ci_results_run_models_gpu/new_model_failures.json ]; then
|
||||
echo "`ci_results_run_models_gpu/new_model_failures.json` exists, continue ..."
|
||||
echo "process=true" >> $GITHUB_ENV
|
||||
else
|
||||
echo "`ci_results_run_models_gpu/new_model_failures.json` doesn't exist, abort."
|
||||
echo "process=false" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- uses: actions/download-artifact@v4
|
||||
if: ${{ env.process == 'true' }}
|
||||
with:
|
||||
pattern: setup_values*
|
||||
path: setup_values
|
||||
merge-multiple: true
|
||||
|
||||
- name: Prepare some setup values
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
if [ -f setup_values/prev_workflow_run_id.txt ]; then
|
||||
echo "PREV_WORKFLOW_RUN_ID=$(cat setup_values/prev_workflow_run_id.txt)" >> $GITHUB_ENV
|
||||
else
|
||||
echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
if [ -f setup_values/other_workflow_run_id.txt ]; then
|
||||
echo "OTHER_WORKFLOW_RUN_ID=$(cat setup_values/other_workflow_run_id.txt)" >> $GITHUB_ENV
|
||||
else
|
||||
echo "OTHER_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Get target commit
|
||||
working-directory: /transformers/utils
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"]); print(commit)')" >> $GITHUB_ENV
|
||||
echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"], workflow_run_id=os.environ["PREV_WORKFLOW_RUN_ID"]); print(commit)')" >> $GITHUB_ENV
|
||||
|
||||
- name: Checkout to `start_sha`
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: git fetch && git checkout ${{ inputs.start_sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: pip freeze
|
||||
|
||||
- name: Check failed tests
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures.json --output_file new_model_failures_with_bad_commit.json
|
||||
|
||||
- name: Show results
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
ls -l new_model_failures_with_bad_commit.json
|
||||
cat new_model_failures_with_bad_commit.json
|
||||
|
||||
- name: Checkout back
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
git checkout ${{ inputs.start_sha }}
|
||||
|
||||
- name: Process report
|
||||
shell: bash
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
env:
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
|
||||
run: |
|
||||
python3 utils/process_bad_commit_report.py
|
||||
@ -95,7 +140,9 @@ jobs:
|
||||
- name: Process report
|
||||
shell: bash
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
env:
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
|
||||
run: |
|
||||
{
|
||||
@ -105,7 +152,7 @@ jobs:
|
||||
} >> "$GITHUB_ENV"
|
||||
|
||||
- name: Send processed report
|
||||
if: ${{ !endsWith(env.REPORT_TEXT, '{}') }}
|
||||
if: ${{ env.process == 'true' && !endsWith(env.REPORT_TEXT, '{}') }}
|
||||
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
|
||||
with:
|
||||
# Slack channel id, channel name, or user id to post message.
|
||||
|
35
.github/workflows/self-scheduled-caller.yml
vendored
35
.github/workflows/self-scheduled-caller.yml
vendored
@ -8,8 +8,43 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- run_scheduled_ci*
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
prev_workflow_run_id:
|
||||
description: 'previous workflow run id to compare'
|
||||
type: string
|
||||
required: false
|
||||
default: ""
|
||||
other_workflow_run_id:
|
||||
description: 'other workflow run id to compare'
|
||||
type: string
|
||||
required: false
|
||||
default: ""
|
||||
|
||||
|
||||
# Used for `push` to easily modiffy the target workflow runs to compare against
|
||||
env:
|
||||
prev_workflow_run_id: ""
|
||||
other_workflow_run_id: ""
|
||||
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
name: Setup
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Setup
|
||||
run: |
|
||||
mkdir "setup_values"
|
||||
echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt"
|
||||
echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt"
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: setup_values
|
||||
path: setup_values
|
||||
|
||||
model-ci:
|
||||
name: Model CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
|
18
.github/workflows/slack-report.yml
vendored
18
.github/workflows/slack-report.yml
vendored
@ -39,6 +39,21 @@ jobs:
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/download-artifact@v4
|
||||
|
||||
- name: Prepare some setup values
|
||||
run: |
|
||||
if [ -f setup_values/prev_workflow_run_id.txt ]; then
|
||||
echo "PREV_WORKFLOW_RUN_ID=$(cat setup_values/prev_workflow_run_id.txt)" >> $GITHUB_ENV
|
||||
else
|
||||
echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
if [ -f setup_values/other_workflow_run_id.txt ]; then
|
||||
echo "OTHER_WORKFLOW_RUN_ID=$(cat setup_values/other_workflow_run_id.txt)" >> $GITHUB_ENV
|
||||
else
|
||||
echo "OTHER_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Send message to Slack
|
||||
if: ${{ inputs.job != 'run_quantization_torch_gpu' }}
|
||||
env:
|
||||
@ -50,7 +65,6 @@ jobs:
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_EVENT: ${{ inputs.ci_event }}
|
||||
CI_SHA: ${{ github.sha }}
|
||||
CI_WORKFLOW_REF: ${{ github.workflow_ref }}
|
||||
CI_TEST_JOB: ${{ inputs.job }}
|
||||
SETUP_STATUS: ${{ inputs.setup_status }}
|
||||
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
|
||||
@ -58,7 +72,6 @@ jobs:
|
||||
# For a job that doesn't depend on (i.e. `needs`) `setup`, the value for `inputs.folder_slices` would be an
|
||||
# empty string, and the called script still get one argument (which is the emtpy string).
|
||||
run: |
|
||||
sudo apt-get install -y curl
|
||||
pip install huggingface_hub
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
@ -86,7 +99,6 @@ jobs:
|
||||
# We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change
|
||||
# `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
|
||||
run: |
|
||||
sudo apt-get install -y curl
|
||||
pip install huggingface_hub
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
|
@ -455,6 +455,8 @@
|
||||
title: Falcon
|
||||
- local: model_doc/falcon3
|
||||
title: Falcon3
|
||||
- local: model_doc/falcon_h1
|
||||
title: FalconH1
|
||||
- local: model_doc/falcon_mamba
|
||||
title: FalconMamba
|
||||
- local: model_doc/flan-t5
|
||||
|
65
docs/source/en/model_doc/falcon_h1.md
Normal file
65
docs/source/en/model_doc/falcon_h1.md
Normal file
@ -0,0 +1,65 @@
|
||||
<!--Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
|
||||
rendered properly in your Markdown viewer.
|
||||
-->
|
||||
|
||||
# FalconH1
|
||||
|
||||
## Overview
|
||||
|
||||
The FalconH1 model was developed by the TII Pretraining team. A comprehensive research paper covering the architecture, pretraining dynamics, experimental results, and conclusions is forthcoming. You can read more about this series in [this website](https://github.com/tiiuae/Falcon-H1).
|
||||
|
||||
## Contributors
|
||||
|
||||
This model was contributed by [DhiyaEddine](https://huggingface.co/DhiyaEddine), [ybelkada](https://huggingface.co/ybelkada), [JingweiZuo](https://huggingface.co/JingweiZuo), [IlyasChahed](https://huggingface.co/IChahed), and [MaksimVelikanov](https://huggingface.co/yellowvm).
|
||||
The original code can be found [here](https://github.com/tiiuae/Falcon-H1).
|
||||
|
||||
|
||||
## FalconH1Config
|
||||
|
||||
| Model | Depth | Dim | Attn Heads | KV | Mamba Heads | d_head | d_state | Ctx Len |
|
||||
|-----------|--------|------|------------|----|--------------|--------------|------|-----------------|
|
||||
| H1 0.5B | 36 | 1024 | 8 | 2 | 24 | 64 / 64 | 128 | 4K, 16K-SFT |
|
||||
| H1 1.5B | 24 | 2048 | 8 | 2 | 48 | 128 / 64 | 256 | 128K |
|
||||
| H1 1.5B-d | 66 | 1280 | 6 | 2 | 24 | 128 / 64 | 256 | 128K |
|
||||
| H1 3B | 32 | 2560 | 10 | 2 | 32 | 128 / 128 | 256 | 128K |
|
||||
| H1 7B | 44 | 3072 | 12 | 2 | 24 | 128 / 128 | 256 | 256K |
|
||||
| H1 34B | 72 | 5120 | 20 | 4 | 32 | 128 / 128 | 256 | 256K |
|
||||
|
||||
|
||||
|
||||
[[autodoc]] FalconH1Config
|
||||
|
||||
<!---
|
||||
## Usage Tips
|
||||
Tips:
|
||||
- The architecture is based on Mamba-2 models.
|
||||
## FalconH1Model
|
||||
[[autodoc]] FalconH1Model
|
||||
- forward
|
||||
-->
|
||||
|
||||
## FalconH1ForCausalLM
|
||||
|
||||
```python
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained("tiiuae/Falcon-H1-7B-Instruct")
|
||||
tokenizer = AutoTokenizer.from_pretrained("tiiuae/Falcon-H1-7B-Instruct")
|
||||
|
||||
message = ["Mamba is a snake with following properties "]
|
||||
inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False)
|
||||
response = model.generate(**inputs, max_new_tokens=64)
|
||||
print(tokenizer.batch_decode(response, skip_special_tokens=True)[0])
|
||||
```
|
||||
|
||||
[[autodoc]] FalconH1ForCausalLM
|
||||
- forward
|
||||
|
||||
This HF implementation is contributed by [younesbelkada](https://github.com/younesbelkada) and [DhiaEddineRhaiem](https://github.com/dhiaEddineRhaiem).
|
@ -1,3 +1,16 @@
|
||||
# Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""":
|
||||
This script is used to test training a model using Tensor Parallelism and Data Parallelism.
|
||||
|
||||
|
@ -60,7 +60,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
Array = Any
|
||||
Dataset = datasets.arrow_dataset.Dataset
|
||||
|
@ -59,7 +59,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risk.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/flax/speech-recognition/requirements.txt")
|
||||
|
||||
|
@ -55,7 +55,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
Array = Any
|
||||
Dataset = datasets.arrow_dataset.Dataset
|
||||
|
@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")
|
||||
|
||||
|
@ -1,3 +1,16 @@
|
||||
# Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""":
|
||||
This script is used to test training a model using Tensor Parallelism and Data Parallelism.
|
||||
|
||||
|
@ -44,7 +44,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.14.0", "To fix: pip install -r examples/pytorch/audio-classification/requirements.txt")
|
||||
|
||||
|
@ -53,7 +53,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")
|
||||
|
||||
|
@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")
|
||||
|
||||
|
@ -48,7 +48,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -42,7 +42,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")
|
||||
|
||||
|
@ -47,7 +47,7 @@ Any model supported by the AutoModelForMaskedImageModeling API can be used.
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")
|
||||
|
||||
|
@ -52,7 +52,7 @@ Any model supported by the AutoModelForMaskedImageModeling API can be used.
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-pretraining/requirements.txt")
|
||||
|
||||
|
@ -46,7 +46,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/instance-segmentation/requirements.txt")
|
||||
|
||||
|
@ -52,7 +52,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/instance-segmentation/requirements.txt")
|
||||
|
||||
|
@ -54,7 +54,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
|
@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -57,7 +57,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
|
@ -59,7 +59,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -53,7 +53,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
|
@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
@ -46,7 +46,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
|
@ -45,7 +45,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -53,7 +53,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
# You should update this to your particular problem to have better documentation of `model_type`
|
||||
|
@ -48,7 +48,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/object-detection/requirements.txt")
|
||||
|
||||
|
@ -51,7 +51,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = get_logger(__name__)
|
||||
|
@ -49,7 +49,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -47,7 +47,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -54,7 +54,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -45,7 +45,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
|
@ -50,7 +50,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/semantic-segmentation/requirements.txt")
|
||||
|
||||
|
@ -49,7 +49,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -49,7 +49,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
|
||||
|
||||
|
@ -52,7 +52,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
|
||||
|
||||
|
@ -47,7 +47,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
|
||||
|
||||
|
@ -51,7 +51,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
||||
|
@ -55,7 +55,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
@ -46,7 +46,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
|
||||
|
||||
|
@ -48,7 +48,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
|
||||
|
||||
|
@ -48,7 +48,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
@ -47,7 +47,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
|
||||
|
||||
|
@ -48,7 +48,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")
|
||||
|
||||
|
@ -55,7 +55,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")
|
||||
|
@ -51,7 +51,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")
|
||||
|
||||
|
@ -56,7 +56,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")
|
||||
|
@ -50,7 +50,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version(
|
||||
"datasets>=1.8.0", "To fix: pip install -r examples/tensorflow/contrastive-image-text/requirements.txt"
|
||||
|
@ -54,7 +54,7 @@ from transformers.utils.versions import require_version
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")
|
||||
|
||||
|
@ -49,7 +49,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -61,7 +61,7 @@ except (ModuleNotFoundError, ImportError):
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -52,7 +52,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
# region Checking dependencies
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
||||
|
@ -46,7 +46,7 @@ from transformers.utils import check_min_version, send_example_telemetry
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
task_to_keys = {
|
||||
"cola": ("sentence", None),
|
||||
|
@ -55,7 +55,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
# region Dependencies and constants
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.52.0.dev0")
|
||||
check_min_version("4.53.0.dev0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
||||
|
2
setup.py
2
setup.py
@ -451,7 +451,7 @@ install_requires = [
|
||||
|
||||
setup(
|
||||
name="transformers",
|
||||
version="4.52.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="4.53.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
|
||||
author_email="transformers@huggingface.co",
|
||||
description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",
|
||||
|
@ -18,7 +18,7 @@
|
||||
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
|
||||
# in the namespace without actually importing anything (and especially none of the backends).
|
||||
|
||||
__version__ = "4.52.0.dev0"
|
||||
__version__ = "4.53.0.dev0"
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
@ -89,8 +89,13 @@ class TextDataset(Dataset):
|
||||
tokenized_text = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text))
|
||||
|
||||
for i in range(0, len(tokenized_text) - block_size + 1, block_size): # Truncate in block of block_size
|
||||
token_block = tokenized_text[i: i + block_size]
|
||||
self.examples.append(
|
||||
tokenizer.build_inputs_with_special_tokens(tokenized_text[i : i + block_size])
|
||||
tokenizer.encode(
|
||||
tokenizer.decode(token_block),
|
||||
add_special_tokens=True,
|
||||
truncation=True
|
||||
)
|
||||
)
|
||||
# Note that we are losing the last truncated example here for the sake of simplicity (no padding)
|
||||
# If your dataset is small, first you should look for a bigger one :-) and second you
|
||||
@ -321,7 +326,7 @@ class LineByLineWithSOPTextDataset(Dataset):
|
||||
raise ValueError(f"Length of sequence b is {len(tokens_b)} which must be no less than 1")
|
||||
|
||||
# add special tokens
|
||||
input_ids = tokenizer.build_inputs_with_special_tokens(tokens_a, tokens_b)
|
||||
input_ids = tokenizer(tokenizer.decode(tokens_a), tokenizer.decode(tokens_b))['input_ids']
|
||||
# add token type ids, 0 for sentence a, 1 for sentence b
|
||||
token_type_ids = tokenizer.create_token_type_ids_from_sequences(tokens_a, tokens_b)
|
||||
|
||||
@ -506,8 +511,7 @@ class TextDatasetForNextSentencePrediction(Dataset):
|
||||
raise ValueError(f"Length of sequence b is {len(tokens_b)} which must be no less than 1")
|
||||
|
||||
# add special tokens
|
||||
input_ids = self.tokenizer.build_inputs_with_special_tokens(tokens_a, tokens_b)
|
||||
# add token type ids, 0 for sentence a, 1 for sentence b
|
||||
input_ids = self.tokenizer(self.tokenizer.decode(tokens_a), self.tokenizer.decode(tokens_b))['input_ids'] # add token type ids, 0 for sentence a, 1 for sentence b
|
||||
token_type_ids = self.tokenizer.create_token_type_ids_from_sequences(tokens_a, tokens_b)
|
||||
|
||||
example = {
|
||||
|
@ -1985,7 +1985,9 @@ class GenerationMixin:
|
||||
instantiated, writes it to `model_kwargs`, under the name expected by the model.
|
||||
"""
|
||||
|
||||
cache_name = "past_key_values" if "mamba" not in self.__class__.__name__.lower() else "cache_params"
|
||||
is_hybrid_cache = any(class_name in self.__class__.__name__.lower() for class_name in ["mamba", "falconh1"])
|
||||
cache_name = "past_key_values" if not is_hybrid_cache else "cache_params"
|
||||
|
||||
requires_cross_attention_cache = (
|
||||
self.config.is_encoder_decoder or model_kwargs.get("encoder_outputs") is not None
|
||||
)
|
||||
|
@ -4177,13 +4177,14 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, PushToHubMixin, PeftAdapterMi
|
||||
|
||||
# We need to correctly dispatch the model on the current process device. The easiest way for this is to use a simple
|
||||
# `device_map` pointing to the correct device
|
||||
if device_mesh is None:
|
||||
tp_plan, device_map, device_mesh = initialize_tensor_parallelism(tp_plan, tp_size=None)
|
||||
else:
|
||||
# TODO: make device_mesh support multiple dimensions
|
||||
if device_mesh.ndim == 1:
|
||||
raise ValueError("device_mesh must be 1 dimensional and will be used for TP")
|
||||
device_map = torch.device(device_mesh.device_type, int(os.environ["LOCAL_RANK"]))
|
||||
if tp_plan is not None:
|
||||
if device_mesh is None and tp_plan is not None:
|
||||
tp_plan, device_map, device_mesh = initialize_tensor_parallelism(tp_plan, tp_size=None)
|
||||
else:
|
||||
# TODO: make device_mesh support multiple dimensions
|
||||
if device_mesh.ndim == 1:
|
||||
raise ValueError("device_mesh must be 1 dimensional and will be used for TP")
|
||||
device_map = torch.device(device_mesh.device_type, int(os.environ["LOCAL_RANK"]))
|
||||
|
||||
if use_auth_token is not None:
|
||||
warnings.warn(
|
||||
|
@ -103,6 +103,7 @@ if TYPE_CHECKING:
|
||||
from .ernie import *
|
||||
from .esm import *
|
||||
from .falcon import *
|
||||
from .falcon_h1 import *
|
||||
from .falcon_mamba import *
|
||||
from .fastspeech2_conformer import *
|
||||
from .flaubert import *
|
||||
|
@ -134,31 +134,6 @@ class AlbertTokenizerFast(PreTrainedTokenizerFast):
|
||||
def can_save_slow_tokenizer(self) -> bool:
|
||||
return os.path.isfile(self.vocab_file) if self.vocab_file else False
|
||||
|
||||
def build_inputs_with_special_tokens(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. An ALBERT sequence has the following format:
|
||||
|
||||
- single sequence: `[CLS] X [SEP]`
|
||||
- pair of sequences: `[CLS] A [SEP] B [SEP]`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: list of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
sep = [self.sep_token_id]
|
||||
cls = [self.cls_token_id]
|
||||
if token_ids_1 is None:
|
||||
return cls + token_ids_0 + sep
|
||||
return cls + token_ids_0 + sep + token_ids_1 + sep
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -118,6 +118,7 @@ CONFIG_MAPPING_NAMES = OrderedDict(
|
||||
("ernie_m", "ErnieMConfig"),
|
||||
("esm", "EsmConfig"),
|
||||
("falcon", "FalconConfig"),
|
||||
("falcon_h1", "FalconH1Config"),
|
||||
("falcon_mamba", "FalconMambaConfig"),
|
||||
("fastspeech2_conformer", "FastSpeech2ConformerConfig"),
|
||||
("flaubert", "FlaubertConfig"),
|
||||
@ -481,6 +482,7 @@ MODEL_NAMES_MAPPING = OrderedDict(
|
||||
("esm", "ESM"),
|
||||
("falcon", "Falcon"),
|
||||
("falcon3", "Falcon3"),
|
||||
("falcon_h1", "FalconH1"),
|
||||
("falcon_mamba", "FalconMamba"),
|
||||
("fastspeech2_conformer", "FastSpeech2Conformer"),
|
||||
("flan-t5", "FLAN-T5"),
|
||||
|
@ -115,6 +115,7 @@ MODEL_MAPPING_NAMES = OrderedDict(
|
||||
("ernie_m", "ErnieMModel"),
|
||||
("esm", "EsmModel"),
|
||||
("falcon", "FalconModel"),
|
||||
("falcon_h1", "FalconH1Model"),
|
||||
("falcon_mamba", "FalconMambaModel"),
|
||||
("fastspeech2_conformer", "FastSpeech2ConformerModel"),
|
||||
("flaubert", "FlaubertModel"),
|
||||
@ -558,6 +559,7 @@ MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict(
|
||||
("emu3", "Emu3ForCausalLM"),
|
||||
("ernie", "ErnieForCausalLM"),
|
||||
("falcon", "FalconForCausalLM"),
|
||||
("falcon_h1", "FalconH1ForCausalLM"),
|
||||
("falcon_mamba", "FalconMambaForCausalLM"),
|
||||
("fuyu", "FuyuForCausalLM"),
|
||||
("gemma", "GemmaForCausalLM"),
|
||||
|
@ -237,13 +237,6 @@ class BartTokenizerFast(PreTrainedTokenizerFast):
|
||||
files = self._tokenizer.model.save(save_directory, name=filename_prefix)
|
||||
return tuple(files)
|
||||
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
output = [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
|
||||
if token_ids_1 is None:
|
||||
return output
|
||||
|
||||
return output + [self.eos_token_id] + token_ids_1 + [self.eos_token_id]
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -126,32 +126,6 @@ class BarthezTokenizerFast(PreTrainedTokenizerFast):
|
||||
def can_save_slow_tokenizer(self) -> bool:
|
||||
return os.path.isfile(self.vocab_file) if self.vocab_file else False
|
||||
|
||||
def build_inputs_with_special_tokens(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A BARThez sequence has the following format:
|
||||
|
||||
- single sequence: `<s> X </s>`
|
||||
- pair of sequences: `<s> A </s></s> B </s>`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
|
||||
if token_ids_1 is None:
|
||||
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||
cls = [self.cls_token_id]
|
||||
sep = [self.sep_token_id]
|
||||
return cls + token_ids_0 + sep + sep + token_ids_1 + sep
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -114,30 +114,6 @@ class BertTokenizerFast(PreTrainedTokenizerFast):
|
||||
|
||||
self.do_lower_case = do_lower_case
|
||||
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A BERT sequence has the following format:
|
||||
|
||||
- single sequence: `[CLS] X [SEP]`
|
||||
- pair of sequences: `[CLS] A [SEP] B [SEP]`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
output = [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||
|
||||
if token_ids_1 is not None:
|
||||
output += token_ids_1 + [self.sep_token_id]
|
||||
|
||||
return output
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -123,31 +123,6 @@ class BigBirdTokenizerFast(PreTrainedTokenizerFast):
|
||||
def can_save_slow_tokenizer(self) -> bool:
|
||||
return os.path.isfile(self.vocab_file) if self.vocab_file else False
|
||||
|
||||
def build_inputs_with_special_tokens(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. An BigBird sequence has the following format:
|
||||
|
||||
- single sequence: `[CLS] X [SEP]`
|
||||
- pair of sequences: `[CLS] A [SEP] B [SEP]`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: list of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
sep = [self.sep_token_id]
|
||||
cls = [self.cls_token_id]
|
||||
if token_ids_1 is None:
|
||||
return cls + token_ids_0 + sep
|
||||
return cls + token_ids_0 + sep + token_ids_1 + sep
|
||||
|
||||
def get_special_tokens_mask(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
|
||||
) -> List[int]:
|
||||
|
@ -264,21 +264,5 @@ class BlenderbotTokenizerFast(PreTrainedTokenizerFast):
|
||||
return len(cls + token_ids_0 + sep) * [0]
|
||||
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
|
||||
|
||||
def build_inputs_with_special_tokens(self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None):
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A Blenderbot sequence has the following format:
|
||||
- single sequence: ` X </s>`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Will be ignored
|
||||
Returns:
|
||||
`List[int]`: list of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
return token_ids_0 + [self.eos_token_id]
|
||||
|
||||
|
||||
__all__ = ["BlenderbotTokenizerFast"]
|
||||
|
@ -69,13 +69,6 @@ class BlenderbotSmallTokenizerFast(PreTrainedTokenizerFast):
|
||||
)
|
||||
self.add_prefix_space = add_prefix_space
|
||||
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
output = [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
|
||||
if token_ids_1 is None:
|
||||
return output
|
||||
|
||||
return output + [self.eos_token_id] + token_ids_1 + [self.eos_token_id]
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -129,32 +129,6 @@ class CamembertTokenizerFast(PreTrainedTokenizerFast):
|
||||
def can_save_slow_tokenizer(self) -> bool:
|
||||
return os.path.isfile(self.vocab_file) if self.vocab_file else False
|
||||
|
||||
def build_inputs_with_special_tokens(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. An CamemBERT sequence has the following format:
|
||||
|
||||
- single sequence: `<s> X </s>`
|
||||
- pair of sequences: `<s> A </s></s> B </s>`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
|
||||
if token_ids_1 is None:
|
||||
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||
cls = [self.cls_token_id]
|
||||
sep = [self.sep_token_id]
|
||||
return cls + token_ids_0 + sep + sep + token_ids_1 + sep
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -106,33 +106,6 @@ class CLIPTokenizerFast(PreTrainedTokenizerFast):
|
||||
|
||||
self.backend_tokenizer.decode = new_decode_method
|
||||
|
||||
def build_inputs_with_special_tokens(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A CLIP sequence has the following format:
|
||||
|
||||
- single sequence: `<|startoftext|> X <|endoftext|>`
|
||||
|
||||
Pairs of sequences are not the expected use case, but they will be handled without a separator.
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
bos_token = [self.bos_token_id]
|
||||
eos_token = [self.eos_token_id]
|
||||
|
||||
if token_ids_1 is None:
|
||||
return bos_token + token_ids_0 + eos_token
|
||||
return bos_token + token_ids_0 + eos_token + eos_token + token_ids_1 + eos_token
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -346,33 +346,5 @@ class CodeLlamaTokenizerFast(PreTrainedTokenizerFast):
|
||||
|
||||
return (out_vocab_file,)
|
||||
|
||||
def build_inputs_with_special_tokens(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. The special tokens depend on calling set_lang.
|
||||
|
||||
An NLLB sequence has the following format, where `X` represents the sequence:
|
||||
|
||||
- `input_ids` (for encoder) `X [eos, src_lang_code]`
|
||||
- `decoder_input_ids`: (for decoder) `X [eos, tgt_lang_code]`
|
||||
|
||||
BOS is never used. Pairs of sequences are not the expected use case, but they will be handled without a
|
||||
separator.
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: list of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
if token_ids_1 is None:
|
||||
return self.bos_token_id + token_ids_0 + self.eos_token_id
|
||||
return self.bos_token_id + token_ids_0 + token_ids_1 + self.eos_token_id
|
||||
|
||||
|
||||
__all__ = ["CodeLlamaTokenizerFast"]
|
||||
|
@ -496,17 +496,5 @@ class CohereTokenizerFast(PreTrainedTokenizerFast):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# TODO ArthurZ let's rely on the template processor instead, refactor all fast tokenizers
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
bos_token_id = [self.bos_token_id] if self.add_bos_token else []
|
||||
eos_token_id = [self.eos_token_id] if self.add_eos_token else []
|
||||
|
||||
output = bos_token_id + token_ids_0 + eos_token_id
|
||||
|
||||
if token_ids_1 is not None:
|
||||
output = output + bos_token_id + token_ids_1 + eos_token_id
|
||||
|
||||
return output
|
||||
|
||||
|
||||
__all__ = ["CohereTokenizerFast"]
|
||||
|
@ -115,30 +115,6 @@ class ConvBertTokenizerFast(PreTrainedTokenizerFast):
|
||||
|
||||
self.do_lower_case = do_lower_case
|
||||
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A ConvBERT sequence has the following format:
|
||||
|
||||
- single sequence: `[CLS] X [SEP]`
|
||||
- pair of sequences: `[CLS] A [SEP] B [SEP]`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
output = [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||
|
||||
if token_ids_1 is not None:
|
||||
output += token_ids_1 + [self.sep_token_id]
|
||||
|
||||
return output
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -148,32 +148,6 @@ class CpmTokenizerFast(PreTrainedTokenizerFast):
|
||||
def can_save_slow_tokenizer(self) -> bool:
|
||||
return os.path.isfile(self.vocab_file) if self.vocab_file else False
|
||||
|
||||
# Copied from transformers.models.xlnet.tokenization_xlnet_fast.XLNetTokenizerFast.build_inputs_with_special_tokens
|
||||
def build_inputs_with_special_tokens(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. An XLNet sequence has the following format:
|
||||
|
||||
- single sequence: `X <sep> <cls>`
|
||||
- pair of sequences: `A <sep> B <sep> <cls>`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
sep = [self.sep_token_id]
|
||||
cls = [self.cls_token_id]
|
||||
if token_ids_1 is None:
|
||||
return token_ids_0 + sep + cls
|
||||
return token_ids_0 + sep + token_ids_1 + sep + cls
|
||||
|
||||
# Copied from transformers.models.xlnet.tokenization_xlnet_fast.XLNetTokenizerFast.create_token_type_ids_from_sequences
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
|
@ -154,31 +154,6 @@ class DebertaTokenizerFast(PreTrainedTokenizerFast):
|
||||
value = AddedToken(value, lstrip=True, rstrip=False) if isinstance(value, str) else value
|
||||
self._mask_token = value
|
||||
|
||||
def build_inputs_with_special_tokens(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A DeBERTa sequence has the following format:
|
||||
|
||||
- single sequence: [CLS] X [SEP]
|
||||
- pair of sequences: [CLS] A [SEP] B [SEP]
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
if token_ids_1 is None:
|
||||
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||
cls = [self.cls_token_id]
|
||||
sep = [self.sep_token_id]
|
||||
return cls + token_ids_0 + sep + token_ids_1 + sep
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -123,30 +123,6 @@ class DebertaV2TokenizerFast(PreTrainedTokenizerFast):
|
||||
def can_save_slow_tokenizer(self) -> bool:
|
||||
return os.path.isfile(self.vocab_file) if self.vocab_file else False
|
||||
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A DeBERTa sequence has the following format:
|
||||
|
||||
- single sequence: [CLS] X [SEP]
|
||||
- pair of sequences: [CLS] A [SEP] B [SEP]
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
|
||||
if token_ids_1 is None:
|
||||
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||
cls = [self.cls_token_id]
|
||||
sep = [self.sep_token_id]
|
||||
return cls + token_ids_0 + sep + token_ids_1 + sep
|
||||
|
||||
def get_special_tokens_mask(self, token_ids_0, token_ids_1=None, already_has_special_tokens=False):
|
||||
"""
|
||||
Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
|
||||
|
@ -191,30 +191,6 @@ class RealmTokenizerFast(PreTrainedTokenizerFast):
|
||||
|
||||
return BatchEncoding(output_data, tensor_type=return_tensors)
|
||||
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A REALM sequence has the following format:
|
||||
|
||||
- single sequence: `[CLS] X [SEP]`
|
||||
- pair of sequences: `[CLS] A [SEP] B [SEP]`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
output = [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||
|
||||
if token_ids_1 is not None:
|
||||
output += token_ids_1 + [self.sep_token_id]
|
||||
|
||||
return output
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -118,30 +118,6 @@ class RetriBertTokenizerFast(PreTrainedTokenizerFast):
|
||||
|
||||
self.do_lower_case = do_lower_case
|
||||
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A BERT sequence has the following format:
|
||||
|
||||
- single sequence: `[CLS] X [SEP]`
|
||||
- pair of sequences: `[CLS] A [SEP] B [SEP]`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
output = [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||
|
||||
if token_ids_1 is not None:
|
||||
output += token_ids_1 + [self.sep_token_id]
|
||||
|
||||
return output
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -115,31 +115,6 @@ class DistilBertTokenizerFast(PreTrainedTokenizerFast):
|
||||
|
||||
self.do_lower_case = do_lower_case
|
||||
|
||||
# Copied from transformers.models.bert.tokenization_bert_fast.BertTokenizerFast.build_inputs_with_special_tokens
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A BERT sequence has the following format:
|
||||
|
||||
- single sequence: `[CLS] X [SEP]`
|
||||
- pair of sequences: `[CLS] A [SEP] B [SEP]`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
output = [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||
|
||||
if token_ids_1 is not None:
|
||||
output += token_ids_1 + [self.sep_token_id]
|
||||
|
||||
return output
|
||||
|
||||
# Copied from transformers.models.bert.tokenization_bert_fast.BertTokenizerFast.create_token_type_ids_from_sequences
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
|
@ -111,30 +111,6 @@ class ElectraTokenizerFast(PreTrainedTokenizerFast):
|
||||
|
||||
self.do_lower_case = do_lower_case
|
||||
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A ELECTRA sequence has the following format:
|
||||
|
||||
- single sequence: `[CLS] X [SEP]`
|
||||
- pair of sequences: `[CLS] A [SEP] B [SEP]`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
output = [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||
|
||||
if token_ids_1 is not None:
|
||||
output += token_ids_1 + [self.sep_token_id]
|
||||
|
||||
return output
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
27
src/transformers/models/falcon_h1/__init__.py
Normal file
27
src/transformers/models/falcon_h1/__init__.py
Normal file
@ -0,0 +1,27 @@
|
||||
# Copyright 2025 TII and the HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ...utils import _LazyModule
|
||||
from ...utils.import_utils import define_import_structure
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .configuration_falcon_h1 import *
|
||||
from .modeling_falcon_h1 import *
|
||||
else:
|
||||
import sys
|
||||
|
||||
_file = globals()["__file__"]
|
||||
sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)
|
283
src/transformers/models/falcon_h1/configuration_falcon_h1.py
Normal file
283
src/transformers/models/falcon_h1/configuration_falcon_h1.py
Normal file
@ -0,0 +1,283 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2025 TII and the HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""FalconH1 model configuration"""
|
||||
|
||||
from ...configuration_utils import PretrainedConfig
|
||||
from ...utils import logging
|
||||
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
class FalconH1Config(PretrainedConfig):
|
||||
r"""
|
||||
This is the configuration class to store the configuration of a [`FalconH1Model`]. It is used to instantiate a
|
||||
FalconH1Model model according to the specified arguments, defining the model architecture. Instantiating a configuration
|
||||
with defaults taken from [ibm-fms/FalconH1-9.8b-2.2T-hf](https://huggingface.co/ibm-fms/FalconH1-9.8b-2.2T-hf).
|
||||
The FalconH1Model is a hybrid [mamba2](https://github.com/state-spaces/mamba) architecture with SwiGLU.
|
||||
The checkpoints are jointly trained by IBM, Princeton, and UIUC.
|
||||
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
||||
documentation from [`PretrainedConfig`] for more information.
|
||||
Args:
|
||||
vocab_size (`int`, *optional*, defaults to 128000):
|
||||
Vocabulary size of the FalconH1 model. Defines the number of different tokens that can be represented by the
|
||||
`inputs_ids` passed when calling [`FalconH1Model`]
|
||||
tie_word_embeddings (`bool`, *optional*, defaults to `False`):
|
||||
Whether the model's input and output word embeddings should be tied. Note that this is only relevant if the
|
||||
model has a output word embedding layer.
|
||||
hidden_size (`int`, *optional*, defaults to 4096):
|
||||
Dimension of the hidden representations.
|
||||
intermediate_size (`int`, *optional*, defaults to 14336):
|
||||
Dimension of the MLP representations.
|
||||
num_hidden_layers (`int`, *optional*, defaults to 32):
|
||||
Number of hidden layers in the Transformer encoder.
|
||||
num_attention_heads (`int`, *optional*, defaults to 32):
|
||||
Number of attention heads for each attention layer in the Transformer encoder.
|
||||
num_key_value_heads (`int`, *optional*, defaults to 8):
|
||||
This is the number of key_value heads that should be used to implement Grouped Query Attention. If
|
||||
`num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
|
||||
`num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
|
||||
converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
|
||||
by meanpooling all the original heads within that group. For more details checkout [this
|
||||
paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to `8`.
|
||||
hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
|
||||
The non-linear activation function (function or string) in the decoder.
|
||||
initializer_range (`float`, *optional*, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
rms_norm_eps (`float`, *optional*, defaults to 1e-05):
|
||||
The epsilon used by the rms normalization layers.
|
||||
use_cache (`bool`, *optional*, defaults to `True`):
|
||||
Whether or not the model should return the last key/values attentions (not used by all models). Only
|
||||
relevant if `config.is_decoder=True`.
|
||||
num_logits_to_keep (`int` or `None`, *optional*, defaults to 1):
|
||||
Number of prompt logits to calculate during generation. If `None`, all logits will be calculated. If an
|
||||
integer value, only last `num_logits_to_keep` logits will be calculated. Default is 1 because only the
|
||||
logits of the last prompt token are needed for generation. For long sequences, the logits for the entire
|
||||
sequence may use a lot of memory so, setting `num_logits_to_keep=1` will reduce memory footprint
|
||||
significantly.
|
||||
pad_token_id (`int`, *optional*, defaults to 0):
|
||||
The id of the padding token.
|
||||
bos_token_id (`int`, *optional*, defaults to 1):
|
||||
The id of the "beginning-of-sequence" token.
|
||||
eos_token_id (`int`, *optional*, defaults to 2):
|
||||
The id of the "end-of-sequence" token.
|
||||
max_position_embeddings (`int`, *optional*, defaults to 8192):
|
||||
Max cached sequence length for the model
|
||||
attention_dropout (`float`, *optional*, defaults to 0.0):
|
||||
The dropout ratio for the attention probabilities.
|
||||
mamba_d_ssm (`int`, *optional*, defaults to 1024):
|
||||
The dimension of the SSM state space latents.
|
||||
mamba_n_heads (`int`, *optional*, defaults to 128):
|
||||
The number of mamba heads used in the v2 implementation.
|
||||
mamba_d_head (`int`, *optional*, defaults to `"auto"`):
|
||||
Head embeddding dimension size
|
||||
mamba_n_groups (`int`, *optional*, defaults to 1):
|
||||
The number of the mamba groups used in the v2 implementation.
|
||||
mamba_d_state (`int`, *optional*, defaults to 256):
|
||||
The dimension the mamba state space latents
|
||||
mamba_d_conv (`int`, *optional*, defaults to 4):
|
||||
The size of the mamba convolution kernel
|
||||
mamba_expand (`int`, *optional*, defaults to 2):
|
||||
Expanding factor (relative to hidden_size) used to determine the mamba intermediate size
|
||||
mamba_chunk_size (`int`, *optional*, defaults to 256):
|
||||
The chunks in which to break the sequence when doing prefill/training
|
||||
mamba_conv_bias (`bool`, *optional*, defaults to `True`):
|
||||
Flag indicating whether or not to use bias in the convolution layer of the mamba mixer block.
|
||||
mamba_proj_bias (`bool`, *optional*, defaults to `False`):
|
||||
Flag indicating whether or not to use bias in the input and output projections (["in_proj", "out_proj"]) of the mamba mixer block
|
||||
mamba_norm_before_gate (`bool`, *optional*, defaults to `True`):
|
||||
Whether to use RMSNorm before the gate in the Mamba block
|
||||
mamba_rms_norm (`bool`, *optional*, defaults to `False`):
|
||||
Whether to use RMSNorm instead of LayerNorm in the Mamba block
|
||||
projectors_bias (`bool`, *optional*, defaults to `False`):
|
||||
Flag indicating whether or not to use bias in the input and output projections (["in_proj", "out_proj"]) of the attention block
|
||||
rope_theta (`float`, *optional*, defaults to 100000.0):
|
||||
The theta value used for the RoPE embeddings.
|
||||
rope_scaling (`float`, *optional*):
|
||||
The scaling value used for the RoPE embeddings. If `None`, no scaling is applied.
|
||||
lm_head_multiplier (`float`, *optional*, defaults to 1.0):
|
||||
The multiplier for the LM head. This is used to scale the output of the LM head.
|
||||
embedding_multiplier (`float`, *optional*, defaults to 1.0):
|
||||
The multiplier for the embedding layer. This is used to scale the output of the embedding layer.
|
||||
mlp_multipliers (`List[float]`, *optional*):
|
||||
The multipliers for the MLP layers. This is used to scale the output of the MLP layers. The first value is
|
||||
the multiplier of gate layer, the second value is the multiplier of the down_proj layer.
|
||||
key_multiplier (`float`, *optional*):
|
||||
The multiplier for the key layer. This is used to scale the output of the key layer.
|
||||
attention_out_multiplier (`float`, *optional*):
|
||||
The multiplier for the attention output layer. This is used to scale the output of the attention output
|
||||
attention_in_multiplier (`float`, *optional*):
|
||||
The multiplier for the attention input layer. This is used to scale the output of the attention input layer.
|
||||
ssm_multipliers (`List[float]`, *optional*):
|
||||
The multipliers for the SSM layers. This is used to scale the output of the SSM layers.
|
||||
ssm_in_multiplier (`float`, *optional*):
|
||||
The multiplier for the SSM input layer. This is used to scale the output of the SSM input layer.
|
||||
ssm_out_multiplier (`float`, *optional*):
|
||||
The multiplier for the SSM output layer. This is used to scale the output of the SSM output layer.
|
||||
"""
|
||||
|
||||
model_type = "falcon_h1"
|
||||
keys_to_ignore_at_inference = ["past_key_values"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vocab_size=128000,
|
||||
tie_word_embeddings=False,
|
||||
hidden_size=4096,
|
||||
intermediate_size=14336,
|
||||
num_hidden_layers=32,
|
||||
num_attention_heads=32,
|
||||
num_key_value_heads=8,
|
||||
hidden_act="silu",
|
||||
initializer_range=0.02,
|
||||
rms_norm_eps=1e-5,
|
||||
use_cache=True,
|
||||
num_logits_to_keep=1,
|
||||
pad_token_id=0,
|
||||
bos_token_id=1,
|
||||
eos_token_id=2,
|
||||
max_position_embeddings=8192,
|
||||
attention_dropout=0.0,
|
||||
mamba_d_ssm=1024,
|
||||
mamba_n_heads=128,
|
||||
mamba_d_head="auto",
|
||||
mamba_n_groups=1,
|
||||
mamba_d_state=256,
|
||||
mamba_d_conv=4,
|
||||
mamba_expand=2,
|
||||
mamba_chunk_size=256,
|
||||
mamba_conv_bias=True,
|
||||
mamba_proj_bias=False,
|
||||
mamba_norm_before_gate=True,
|
||||
mamba_rms_norm=False,
|
||||
projectors_bias=False,
|
||||
rope_theta=100000.0,
|
||||
rope_scaling=None,
|
||||
lm_head_multiplier=1.0,
|
||||
embedding_multiplier=1.0,
|
||||
mlp_multipliers=None,
|
||||
key_multiplier=None,
|
||||
attention_out_multiplier=None,
|
||||
attention_in_multiplier=None,
|
||||
ssm_multipliers=None,
|
||||
ssm_in_multiplier=None,
|
||||
ssm_out_multiplier=None,
|
||||
**kwargs,
|
||||
):
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.intermediate_size = intermediate_size
|
||||
self.num_hidden_layers = num_hidden_layers
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.max_position_embeddings = max_position_embeddings
|
||||
self.attention_dropout = attention_dropout
|
||||
self.attention_bias = False
|
||||
self.mlp_bias = False
|
||||
|
||||
# for backward compatibility
|
||||
if num_key_value_heads is None:
|
||||
num_key_value_heads = num_attention_heads
|
||||
|
||||
self.num_key_value_heads = num_key_value_heads
|
||||
self.hidden_act = hidden_act
|
||||
self.initializer_range = initializer_range
|
||||
self.rms_norm_eps = rms_norm_eps
|
||||
|
||||
self.use_cache = use_cache
|
||||
self.num_logits_to_keep = num_logits_to_keep
|
||||
|
||||
self.rope_theta = rope_theta
|
||||
self.rope_scaling = None
|
||||
self.rope_scaling = rope_scaling
|
||||
self.projectors_bias = projectors_bias
|
||||
mamba_intermediate = mamba_expand * hidden_size if mamba_d_ssm is None else mamba_d_ssm
|
||||
|
||||
if mamba_intermediate % mamba_n_heads != 0:
|
||||
raise ValueError("mamba_n_heads must divide mamba_expand * hidden_size")
|
||||
|
||||
# for the mamba_v2, must satisfy the following
|
||||
if mamba_d_head == "auto":
|
||||
mamba_d_head = mamba_intermediate // mamba_n_heads
|
||||
|
||||
if mamba_d_head * mamba_n_heads != mamba_intermediate:
|
||||
raise ValueError("The dimensions for the Mamba head state do not match the model intermediate_size")
|
||||
|
||||
self.mamba_d_ssm = mamba_d_ssm
|
||||
self.mamba_n_heads = mamba_n_heads
|
||||
self.mamba_d_head = mamba_d_head
|
||||
self.mamba_n_groups = mamba_n_groups
|
||||
self.mamba_d_state = mamba_d_state
|
||||
self.mamba_d_conv = mamba_d_conv
|
||||
self.mamba_expand = mamba_expand
|
||||
self.mamba_chunk_size = mamba_chunk_size
|
||||
self.mamba_conv_bias = mamba_conv_bias
|
||||
self.mamba_proj_bias = mamba_proj_bias
|
||||
|
||||
self.mamba_norm_before_gate = mamba_norm_before_gate
|
||||
self.mamba_rms_norm = mamba_rms_norm
|
||||
|
||||
self.lm_head_multiplier = lm_head_multiplier
|
||||
self.embedding_multiplier = embedding_multiplier
|
||||
|
||||
if mlp_multipliers is not None:
|
||||
self.mlp_multipliers = mlp_multipliers
|
||||
else:
|
||||
self.mlp_multipliers = [1.0, 1.0]
|
||||
|
||||
if attention_out_multiplier is not None:
|
||||
self.attention_out_multiplier = attention_out_multiplier
|
||||
else:
|
||||
self.attention_out_multiplier = 1.0
|
||||
|
||||
if attention_in_multiplier is not None:
|
||||
self.attention_in_multiplier = attention_in_multiplier
|
||||
else:
|
||||
self.attention_in_multiplier = 1.0
|
||||
|
||||
if key_multiplier is not None:
|
||||
self.key_multiplier = key_multiplier
|
||||
else:
|
||||
self.key_multiplier = 1.0
|
||||
|
||||
if ssm_multipliers is not None:
|
||||
self.ssm_multipliers = ssm_multipliers
|
||||
else:
|
||||
#
|
||||
self.ssm_multipliers = [1.0, 1.0, 1.0, 1.0, 1.0]
|
||||
|
||||
if ssm_in_multiplier is not None:
|
||||
self.ssm_in_multiplier = ssm_in_multiplier
|
||||
else:
|
||||
self.ssm_in_multiplier = 1.0
|
||||
|
||||
if ssm_out_multiplier is not None:
|
||||
self.ssm_out_multiplier = ssm_out_multiplier
|
||||
else:
|
||||
self.ssm_out_multiplier = 1.0
|
||||
|
||||
super().__init__(
|
||||
pad_token_id=pad_token_id,
|
||||
bos_token_id=bos_token_id,
|
||||
eos_token_id=eos_token_id,
|
||||
tie_word_embeddings=tie_word_embeddings,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@property
|
||||
def layers_block_type(self):
|
||||
return ["attention" for i in range(self.num_hidden_layers)]
|
||||
|
||||
|
||||
__all__ = ["FalconH1Config"]
|
@ -0,0 +1,151 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2025 TII and the HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""This script can be used to convert checkpoints provided in the `mamba_ssm` library into the format provided in HuggingFace `transformers`. It depends on the `mamba2_ssm` package to be installed."""
|
||||
|
||||
import argparse
|
||||
|
||||
import torch
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, FalconH1Config, FalconH1ForCausalLM
|
||||
|
||||
|
||||
CONVERSION_MAPPING = {
|
||||
"backbone": "model",
|
||||
"embeddings": "embed_tokens",
|
||||
"mixer.": "",
|
||||
"mixer_ssm": "mamba",
|
||||
"mixer_attn": "self_attn",
|
||||
"mlp.": "feed_forward.",
|
||||
"mlp_norm": "pre_ff_layernorm",
|
||||
"ssm_proj": "mamba.in_proj",
|
||||
"attn_out_proj": "o_proj",
|
||||
".norm.": ".input_layernorm.",
|
||||
".mamba.input_layernorm.": ".mamba.norm.",
|
||||
".ssm_out_proj.": ".mamba.out_proj.",
|
||||
"norm_f": "final_layernorm",
|
||||
}
|
||||
|
||||
|
||||
def convert_falcon_h1_to_hf(input_model_path, output_path):
|
||||
tokenizer = AutoTokenizer.from_pretrained(input_model_path)
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
input_model_path, torch_dtype=torch.bfloat16, trust_remote_code=True, low_cpu_mem_usage=True
|
||||
)
|
||||
|
||||
intermediate_size = int(model.config.expansion_factor * model.config.hidden_size)
|
||||
|
||||
if intermediate_size % 2 != 0:
|
||||
intermediate_size = intermediate_size + (intermediate_size % 2)
|
||||
|
||||
new_config = FalconH1Config(
|
||||
vocab_size=model.config.vocab_size,
|
||||
tie_word_embeddings=model.config.tie_word_embeddings,
|
||||
hidden_size=model.config.hidden_size,
|
||||
intermediate_size=intermediate_size,
|
||||
mamba_d_state=model.config.state_size,
|
||||
num_hidden_layers=model.config.num_hidden_layers,
|
||||
mamba_use_mlp=model.config.use_mlp,
|
||||
rms_norm_eps=model.config.layer_norm_epsilon,
|
||||
pad_token_id=model.config.pad_token_id,
|
||||
eos_token_id=model.config.eos_token_id,
|
||||
mamba_expand=model.config.expand,
|
||||
mamba_d_conv=model.config.conv_kernel,
|
||||
mamba_n_groups=model.config.n_groups,
|
||||
mamba_n_heads=model.config.num_heads,
|
||||
mamba_norm_before_gate=model.config.norm_before_gate,
|
||||
mamba_rms_norm=model.config.rms_norm,
|
||||
mamba_d_ssm=model.config.d_ssm,
|
||||
attention_bias=model.config.use_bias,
|
||||
projectors_bias=model.config.use_bias,
|
||||
mamba_conv_bias=model.config.use_conv_bias,
|
||||
hidden_act=model.config.hidden_act,
|
||||
use_cache=model.config.use_cache,
|
||||
mamba_chunk_size=model.config.chunk_size,
|
||||
num_attention_heads=model.config.num_heads_mha,
|
||||
num_key_value_heads=model.config.num_key_value_heads,
|
||||
head_dim=model.config.head_dim_mha,
|
||||
lm_head_multiplier=model.config.lm_head_multiplier,
|
||||
embedding_multiplier=model.config.embedding_multiplier,
|
||||
mlp_multipliers=model.config.mlp_multipliers,
|
||||
key_multiplier=model.config.key_multiplier,
|
||||
attention_out_multiplier=model.config.attention_out_multiplier,
|
||||
attention_in_multiplier=model.config.attention_in_multiplier,
|
||||
ssm_multipliers=model.config.ssm_multipliers,
|
||||
ssm_in_multiplier=model.config.ssm_in_multiplier,
|
||||
ssm_out_multiplier=model.config.ssm_out_multiplier,
|
||||
rope_theta=model.config.rope_theta,
|
||||
)
|
||||
|
||||
old_state_dict = model.state_dict()
|
||||
new_state_dict = {}
|
||||
|
||||
for old_key, old_value in old_state_dict.items():
|
||||
new_key = old_key
|
||||
for conversion_key, conversion_value in CONVERSION_MAPPING.items():
|
||||
if conversion_key in old_key:
|
||||
new_key = new_key.replace(conversion_key, conversion_value)
|
||||
|
||||
if "mamba.input_layernorm" in new_key:
|
||||
new_key = new_key.replace("mamba.input_layernorm", "mamba.norm")
|
||||
|
||||
# Special processing for attention layers
|
||||
if "self_attn.attn_proj" in new_key:
|
||||
num_heads = new_config.num_attention_heads
|
||||
num_kv_heads = new_config.num_key_value_heads
|
||||
head_dim = new_config.head_dim
|
||||
q_proj, k_proj, v_proj = old_value.split(
|
||||
[
|
||||
num_heads * head_dim,
|
||||
num_kv_heads * head_dim,
|
||||
num_kv_heads * head_dim,
|
||||
],
|
||||
dim=0,
|
||||
)
|
||||
new_state_dict[new_key.replace("attn_proj", "q_proj")] = q_proj
|
||||
new_state_dict[new_key.replace("attn_proj", "k_proj")] = k_proj
|
||||
new_state_dict[new_key.replace("attn_proj", "v_proj")] = v_proj
|
||||
else:
|
||||
new_state_dict[new_key] = old_value
|
||||
|
||||
with torch.device("meta"):
|
||||
new_model = FalconH1ForCausalLM(new_config)
|
||||
|
||||
del model
|
||||
|
||||
new_model.load_state_dict(new_state_dict, strict=True, assign=True)
|
||||
|
||||
new_model.save_pretrained(output_path)
|
||||
tokenizer.save_pretrained(output_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--mamba_ssm_checkpoint_directory",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path to a directory containing the `pytorch_model.bin` mamba_ssm checkpoint file to be converted.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o", "--output_dir", type=str, required=True, help="Path to directory to save the converted output model to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
convert_falcon_h1_to_hf(
|
||||
args.mamba_ssm_checkpoint_directory,
|
||||
args.output_dir,
|
||||
)
|
1692
src/transformers/models/falcon_h1/modeling_falcon_h1.py
Normal file
1692
src/transformers/models/falcon_h1/modeling_falcon_h1.py
Normal file
File diff suppressed because it is too large
Load Diff
1442
src/transformers/models/falcon_h1/modular_falcon_h1.py
Normal file
1442
src/transformers/models/falcon_h1/modular_falcon_h1.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -117,31 +117,6 @@ class FNetTokenizerFast(PreTrainedTokenizerFast):
|
||||
def can_save_slow_tokenizer(self) -> bool:
|
||||
return os.path.isfile(self.vocab_file) if self.vocab_file else False
|
||||
|
||||
def build_inputs_with_special_tokens(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. An FNet sequence has the following format:
|
||||
|
||||
- single sequence: `[CLS] X [SEP]`
|
||||
- pair of sequences: `[CLS] A [SEP] B [SEP]`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: list of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
sep = [self.sep_token_id]
|
||||
cls = [self.cls_token_id]
|
||||
if token_ids_1 is None:
|
||||
return cls + token_ids_0 + sep
|
||||
return cls + token_ids_0 + sep + token_ids_1 + sep
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -140,31 +140,6 @@ class FunnelTokenizerFast(PreTrainedTokenizerFast):
|
||||
|
||||
self.do_lower_case = do_lower_case
|
||||
|
||||
# Copied from transformers.models.bert.tokenization_bert_fast.BertTokenizerFast.build_inputs_with_special_tokens with BERT->Funnel
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A Funnel sequence has the following format:
|
||||
|
||||
- single sequence: `[CLS] X [SEP]`
|
||||
- pair of sequences: `[CLS] A [SEP] B [SEP]`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
output = [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||
|
||||
if token_ids_1 is not None:
|
||||
output += token_ids_1 + [self.sep_token_id]
|
||||
|
||||
return output
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -183,17 +183,5 @@ class GemmaTokenizerFast(PreTrainedTokenizerFast):
|
||||
|
||||
return (out_vocab_file,)
|
||||
|
||||
# Copied from transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast.build_inputs_with_special_tokens
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
bos_token_id = [self.bos_token_id] if self.add_bos_token else []
|
||||
eos_token_id = [self.eos_token_id] if self.add_eos_token else []
|
||||
|
||||
output = bos_token_id + token_ids_0 + eos_token_id
|
||||
|
||||
if token_ids_1 is not None:
|
||||
output = output + bos_token_id + token_ids_1 + eos_token_id
|
||||
|
||||
return output
|
||||
|
||||
|
||||
__all__ = ["GemmaTokenizerFast"]
|
||||
|
@ -204,18 +204,6 @@ class GPTNeoXTokenizerFast(PreTrainedTokenizerFast):
|
||||
+ eos_token_id
|
||||
)
|
||||
|
||||
# Copied from transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast.build_inputs_with_special_tokens
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
bos_token_id = [self.bos_token_id] if self.add_bos_token else []
|
||||
eos_token_id = [self.eos_token_id] if self.add_eos_token else []
|
||||
|
||||
output = bos_token_id + token_ids_0 + eos_token_id
|
||||
|
||||
if token_ids_1 is not None:
|
||||
output = output + bos_token_id + token_ids_1 + eos_token_id
|
||||
|
||||
return output
|
||||
|
||||
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
|
||||
files = self._tokenizer.model.save(save_directory, name=filename_prefix)
|
||||
return tuple(files)
|
||||
|
@ -71,33 +71,6 @@ class HerbertTokenizerFast(PreTrainedTokenizerFast):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def build_inputs_with_special_tokens(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. An HerBERT, like BERT sequence has the following format:
|
||||
|
||||
- single sequence: `<s> X </s>`
|
||||
- pair of sequences: `<s> A </s> B </s>`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
|
||||
cls = [self.cls_token_id]
|
||||
sep = [self.sep_token_id]
|
||||
if token_ids_1 is None:
|
||||
return cls + token_ids_0 + sep
|
||||
|
||||
return cls + token_ids_0 + sep + token_ids_1 + sep
|
||||
|
||||
def get_special_tokens_mask(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
|
||||
) -> List[int]:
|
||||
|
@ -115,30 +115,6 @@ class LayoutLMTokenizerFast(PreTrainedTokenizerFast):
|
||||
|
||||
self.do_lower_case = do_lower_case
|
||||
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A LayoutLM sequence has the following format:
|
||||
|
||||
- single sequence: `[CLS] X [SEP]`
|
||||
- pair of sequences: `[CLS] A [SEP] B [SEP]`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
output = [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||
|
||||
if token_ids_1 is not None:
|
||||
output += token_ids_1 + [self.sep_token_id]
|
||||
|
||||
return output
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -757,30 +757,6 @@ class LayoutLMv2TokenizerFast(PreTrainedTokenizerFast):
|
||||
|
||||
return encoded_inputs
|
||||
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. A BERT sequence has the following format:
|
||||
|
||||
- single sequence: `[CLS] X [SEP]`
|
||||
- pair of sequences: `[CLS] A [SEP] B [SEP]`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
output = [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||
|
||||
if token_ids_1:
|
||||
output += token_ids_1 + [self.sep_token_id]
|
||||
|
||||
return output
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -816,13 +816,6 @@ class LayoutLMv3TokenizerFast(PreTrainedTokenizerFast):
|
||||
files = self._tokenizer.model.save(save_directory, name=filename_prefix)
|
||||
return tuple(files)
|
||||
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
output = [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
|
||||
if token_ids_1 is None:
|
||||
return output
|
||||
|
||||
return output + [self.eos_token_id] + token_ids_1 + [self.eos_token_id]
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -745,32 +745,6 @@ class LayoutXLMTokenizerFast(PreTrainedTokenizerFast):
|
||||
|
||||
return encoded_inputs
|
||||
|
||||
def build_inputs_with_special_tokens(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
"""
|
||||
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
||||
adding special tokens. An XLM-RoBERTa sequence has the following format:
|
||||
|
||||
- single sequence: `<s> X </s>`
|
||||
- pair of sequences: `<s> A </s></s> B </s>`
|
||||
|
||||
Args:
|
||||
token_ids_0 (`List[int]`):
|
||||
List of IDs to which the special tokens will be added.
|
||||
token_ids_1 (`List[int]`, *optional*):
|
||||
Optional second list of IDs for sequence pairs.
|
||||
|
||||
Returns:
|
||||
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
|
||||
"""
|
||||
|
||||
if token_ids_1 is None:
|
||||
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
|
||||
cls = [self.cls_token_id]
|
||||
sep = [self.sep_token_id]
|
||||
return cls + token_ids_0 + sep + sep + token_ids_1 + sep
|
||||
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
) -> List[int]:
|
||||
|
@ -241,14 +241,6 @@ class LEDTokenizerFast(PreTrainedTokenizerFast):
|
||||
files = self._tokenizer.model.save(save_directory, name=filename_prefix)
|
||||
return tuple(files)
|
||||
|
||||
# Copied from transformers.models.bart.tokenization_bart_fast.BartTokenizerFast.build_inputs_with_special_tokens
|
||||
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
||||
output = [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
|
||||
if token_ids_1 is None:
|
||||
return output
|
||||
|
||||
return output + [self.eos_token_id] + token_ids_1 + [self.eos_token_id]
|
||||
|
||||
# Copied from transformers.models.bart.tokenization_bart_fast.BartTokenizerFast.create_token_type_ids_from_sequences with BART->LED
|
||||
def create_token_type_ids_from_sequences(
|
||||
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user