Compare commits

..

7 Commits

Author SHA1 Message Date
f3ff530a45 [run-slow] pixtral 2024-12-05 18:45:21 +00:00
6769700a16 Correct nesting in test 2024-12-05 18:41:21 +00:00
af9f67c9d9 Correct nesting in test 2024-12-05 18:40:35 +00:00
3406432db3 More error handling 2024-12-05 18:36:06 +00:00
031fdd5e10 make fixup 2024-12-05 18:32:42 +00:00
ed0b4303e3 Fix the structure of images output by the processor 2024-12-05 18:31:08 +00:00
49055e150d Fix the structure of images output by the processor 2024-12-05 18:19:10 +00:00
2292 changed files with 52267 additions and 127076 deletions

View File

@ -13,7 +13,6 @@ jobs:
check_circleci_user:
docker:
- image: python:3.10-slim
resource_class: small
parallelism: 1
steps:
- run: echo $CIRCLE_PROJECT_USERNAME
@ -58,15 +57,15 @@ jobs:
- run:
name: "Prepare pipeline parameters"
command: |
python utils/process_test_artifacts.py
python utils/process_test_artifacts.py
# To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
# Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
# We used:
# https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
# We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
- store_artifacts:
path: test_preparation/transformed_artifacts.json
- store_artifacts:
@ -110,7 +109,7 @@ jobs:
- run:
name: "Prepare pipeline parameters"
command: |
python utils/process_test_artifacts.py
python utils/process_test_artifacts.py
# To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
# Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
@ -170,7 +169,7 @@ jobs:
- store_artifacts:
path: ~/transformers/installed.txt
- run: python utils/check_copies.py
- run: python utils/check_modular_conversion.py --num_workers 4
- run: python utils/check_modular_conversion.py
- run: python utils/check_table.py
- run: python utils/check_dummies.py
- run: python utils/check_repo.py

View File

@ -32,7 +32,7 @@ COMMON_ENV_VARIABLES = {
"RUN_PT_FLAX_CROSS_TESTS": False,
}
# Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical
COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsfE":None}
COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsf":None}
DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}]
@ -40,23 +40,9 @@ class EmptyJob:
job_name = "empty"
def to_dict(self):
steps = [{"run": 'ls -la'}]
if self.job_name == "collection_job":
steps.extend(
[
"checkout",
{"run": "pip install requests || true"},
{"run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true"""},
{"run": 'python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true'},
{"store_artifacts": {"path": "outputs"}},
{"run": 'echo "All required jobs have now completed"'},
]
)
return {
"docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE),
"resource_class": "small",
"steps": steps,
"steps":["checkout"],
}
@ -68,9 +54,9 @@ class CircleCIJob:
install_steps: List[str] = None
marker: Optional[str] = None
parallelism: Optional[int] = 0
pytest_num_workers: int = 8
pytest_num_workers: int = 12
pytest_options: Dict[str, Any] = None
resource_class: Optional[str] = "xlarge"
resource_class: Optional[str] = "2xlarge"
tests_to_run: Optional[List[str]] = None
num_test_files_per_worker: Optional[int] = 10
# This should be only used for doctest job!
@ -147,7 +133,7 @@ class CircleCIJob:
"command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""}
},
{"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}},
{"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <<pipeline.parameters.{self.job_name}_test_list>> --header "Circle-Token: $CIRCLE_TOKEN"' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}},
{"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <<pipeline.parameters.{self.job_name}_test_list>>' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}},
{"run": {"name": "Split tests across parallel nodes: show current parallel tests",
"command": f"TESTS=$(circleci tests split --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt"
}
@ -199,6 +185,7 @@ torch_job = CircleCIJob(
docker_image=[{"image": "huggingface/transformers-torch-light"}],
marker="not generate",
parallelism=6,
pytest_num_workers=8
)
generate_job = CircleCIJob(
@ -206,24 +193,28 @@ generate_job = CircleCIJob(
docker_image=[{"image": "huggingface/transformers-torch-light"}],
marker="generate",
parallelism=6,
pytest_num_workers=8
)
tokenization_job = CircleCIJob(
"tokenization",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
parallelism=8,
pytest_num_workers=16
)
processor_job = CircleCIJob(
"processors",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
parallelism=8,
pytest_num_workers=6
)
tf_job = CircleCIJob(
"tf",
docker_image=[{"image":"huggingface/transformers-tf-light"}],
parallelism=6,
pytest_num_workers=16,
)
@ -231,8 +222,7 @@ flax_job = CircleCIJob(
"flax",
docker_image=[{"image":"huggingface/transformers-jax-light"}],
parallelism=6,
pytest_num_workers=16,
resource_class="2xlarge",
pytest_num_workers=16
)
@ -241,7 +231,7 @@ pipelines_torch_job = CircleCIJob(
additional_env={"RUN_PIPELINE_TESTS": True},
docker_image=[{"image":"huggingface/transformers-torch-light"}],
marker="is_pipeline_test",
parallelism=4,
parallelism=4
)
@ -250,7 +240,7 @@ pipelines_tf_job = CircleCIJob(
additional_env={"RUN_PIPELINE_TESTS": True},
docker_image=[{"image":"huggingface/transformers-tf-light"}],
marker="is_pipeline_test",
parallelism=4,
parallelism=4
)
@ -267,6 +257,7 @@ examples_torch_job = CircleCIJob(
docker_image=[{"image":"huggingface/transformers-examples-torch"}],
# TODO @ArthurZucker remove this once docker is easier to build
install_steps=["uv venv && uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
pytest_num_workers=8,
)
@ -274,6 +265,7 @@ examples_tensorflow_job = CircleCIJob(
"examples_tensorflow",
additional_env={"OMP_NUM_THREADS": 8},
docker_image=[{"image":"huggingface/transformers-examples-tf"}],
pytest_num_workers=16,
)
@ -288,7 +280,6 @@ hub_job = CircleCIJob(
],
marker="is_staging_test",
pytest_num_workers=2,
resource_class="medium",
)
@ -301,13 +292,13 @@ onnx_job = CircleCIJob(
],
pytest_options={"k onnx": None},
pytest_num_workers=1,
resource_class="small",
)
exotic_models_job = CircleCIJob(
"exotic_models",
docker_image=[{"image":"huggingface/transformers-exotic-models"}],
pytest_num_workers=12,
parallelism=4,
pytest_options={"durations": 100},
)
@ -326,6 +317,7 @@ non_model_job = CircleCIJob(
docker_image=[{"image": "huggingface/transformers-torch-light"}],
marker="not generate",
parallelism=6,
pytest_num_workers=8,
)
@ -360,7 +352,6 @@ REPO_UTIL_TESTS = [repo_utils_job]
DOC_TESTS = [doc_test_job]
ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job] # fmt: skip
def create_circleci_config(folder=None):
if folder is None:
folder = os.getcwd()
@ -370,13 +361,7 @@ def create_circleci_config(folder=None):
if len(jobs) == 0:
jobs = [EmptyJob()]
else:
print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
# Add a job waiting all the test jobs and aggregate their test summary files at the end
collection_job = EmptyJob()
collection_job.job_name = "collection_job"
jobs = [collection_job] + jobs
print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
config = {
"version": "2.1",
"parameters": {
@ -386,14 +371,9 @@ def create_circleci_config(folder=None):
**{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs},
**{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs},
},
"jobs": {j.job_name: j.to_dict() for j in jobs}
"jobs" : {j.job_name: j.to_dict() for j in jobs},
"workflows": {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
}
if "CIRCLE_TOKEN" in os.environ:
# For private forked repo. (e.g. new model addition)
config["workflows"] = {"version": 2, "run_tests": {"jobs": [{j.job_name: {"context": ["TRANSFORMERS_CONTEXT"]}} for j in jobs]}}
else:
# For public repo. (e.g. `transformers`)
config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
with open(os.path.join(folder, "generated_config.yml"), "w") as f:
f.write(yaml.dump(config, sort_keys=False, default_flow_style=False).replace("' << pipeline", " << pipeline").replace(">> '", " >>"))

View File

@ -106,7 +106,6 @@ body:
label: Reproduction
description: |
Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
Please include relevant config information with your code, for example your Trainers, TRL, Peft, and DeepSpeed configs.
If you have code snippets, error messages, stack traces please provide them here as well.
Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.

View File

@ -18,8 +18,7 @@ jobs:
name: Benchmark
strategy:
matrix:
# group: [aws-g5-4xlarge-cache, aws-p4d-24xlarge-plus] (A100 runner is not enabled)
group: [aws-g5-4xlarge-cache]
group: [aws-g5-4xlarge-cache, aws-p4d-24xlarge-plus]
runs-on:
group: ${{ matrix.group }}
if: |
@ -64,7 +63,7 @@ jobs:
commit_id=$GITHUB_SHA
fi
commit_msg=$(git show -s --format=%s | cut -c1-70)
python3 benchmark/benchmarks_entrypoint.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
python3 benchmark/llama.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
env:
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
# Enable this to see debug logs

View File

@ -3,7 +3,7 @@ name: Build docker images (scheduled)
on:
push:
branches:
- check_doc_image
- build_ci_docker_image*
repository_dispatch:
workflow_call:
inputs:
@ -18,6 +18,132 @@ concurrency:
cancel-in-progress: false
jobs:
latest-docker:
name: "Latest PyTorch + TensorFlow [dev]"
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-all-latest-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
# Push CI images still need to be re-built daily
-
name: Build and push (for Push CI) in a daily basis
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
if: inputs.image_postfix != '-push-ci'
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-all-latest-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-all-latest-gpu-push-ci
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
latest-torch-deepspeed-docker:
name: "Latest PyTorch + DeepSpeed"
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-pytorch-deepspeed-latest-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER}}
title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
# Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
latest-torch-deepspeed-docker-for-push-ci-daily-build:
name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
# Push CI images still need to be re-built daily
-
name: Build and push (for Push CI) in a daily basis
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
if: inputs.image_postfix != '-push-ci'
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-pytorch-deepspeed-latest-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
doc-builder:
name: "Doc builder"
# Push CI doesn't need this image
@ -50,6 +176,218 @@ jobs:
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the huggingface/transformers-doc-builder docker build
title: 🤗 Results of the huggingface/transformers-doc-builder docker build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
latest-pytorch:
name: "Latest PyTorch [dev]"
# Push CI doesn't need this image
if: inputs.image_postfix != '-push-ci'
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-pytorch-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-pytorch-gpu
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
latest-pytorch-amd:
name: "Latest PyTorch (AMD) [dev]"
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-pytorch-amd-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }}
# Push CI images still need to be re-built daily
-
name: Build and push (for Push CI) in a daily basis
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
if: inputs.image_postfix != '-push-ci'
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-pytorch-amd-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-pytorch-amd-gpu-push-ci
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
latest-tensorflow:
name: "Latest TensorFlow [dev]"
# Push CI doesn't need this image
if: inputs.image_postfix != '-push-ci'
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-tensorflow-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-tensorflow-gpu
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the huggingface/transformers-tensorflow-gpu build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
latest-pytorch-deepspeed-amd:
name: "PyTorch + DeepSpeed (AMD) [dev]"
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-pytorch-deepspeed-amd-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
# Push CI images still need to be re-built daily
-
name: Build and push (for Push CI) in a daily basis
# This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
# The later case is useful for manual image building for debugging purpose. Use another tag in this case!
if: inputs.image_postfix != '-push-ci'
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-pytorch-deepspeed-amd-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
latest-quantization-torch-docker:
name: "Latest Pytorch + Quantization [dev]"
# Push CI doesn't need this image
if: inputs.image_postfix != '-push-ci'
runs-on:
group: aws-general-8-plus
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Check out code
uses: actions/checkout@v4
-
name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: ./docker/transformers-quantization-latest-gpu
build-args: |
REF=main
push: true
tags: huggingface/transformers-quantization-latest-gpu${{ inputs.image_postfix }}
- name: Post to Slack
if: always()
uses: huggingface/hf-workflows/.github/actions/post-slack@main
with:
slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
title: 🤗 Results of the transformers-quantization-latest-gpu build
status: ${{ job.status }}
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

View File

@ -134,3 +134,10 @@ jobs:
slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
waitForSSH: true
benchmark:
name: Benchmark workflow
needs: get_modified_models
if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' && fromJson(needs.get_modified_models.outputs.matrix)[0] != null }}
uses: ./.github/workflows/benchmark.yml
secrets: inherit

View File

@ -1,417 +0,0 @@
name: PR comment GitHub CI
on:
issue_comment:
types:
- created
branches-ignore:
- main
concurrency:
group: ${{ github.workflow }}-${{ github.event.issue.number }}-${{ startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow') }}
cancel-in-progress: true
permissions: read-all
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
jobs:
get-pr-number:
runs-on: ubuntu-22.04
name: Get PR number
# For security: only allow team members to run
if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
outputs:
PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
steps:
- name: Get PR number
shell: bash
run: |
if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then
echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
else
echo "PR_NUMBER=" >> $GITHUB_ENV
fi
- name: Check PR number
shell: bash
run: |
echo "${{ env.PR_NUMBER }}"
- name: Set PR number
id: set_pr_number
run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT"
get-sha:
runs-on: ubuntu-22.04
needs: get-pr-number
if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
outputs:
PR_HEAD_SHA: ${{ steps.get_sha.outputs.PR_HEAD_SHA }}
PR_MERGE_SHA: ${{ steps.get_sha.outputs.PR_MERGE_SHA }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: "0"
ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"
- name: Get SHA (and verify timestamps against the issue comment date)
id: get_sha
env:
PR_NUMBER: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
COMMENT_DATE: ${{ github.event.comment.created_at }}
run: |
git fetch origin refs/pull/$PR_NUMBER/head:refs/remotes/pull/$PR_NUMBER/head
git checkout refs/remotes/pull/$PR_NUMBER/head
echo "PR_HEAD_SHA: $(git log -1 --format=%H)"
echo "PR_HEAD_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT"
git fetch origin refs/pull/$PR_NUMBER/merge:refs/remotes/pull/$PR_NUMBER/merge
git checkout refs/remotes/pull/$PR_NUMBER/merge
echo "PR_MERGE_SHA: $(git log -1 --format=%H)"
echo "PR_MERGE_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT"
PR_MERGE_COMMIT_TIMESTAMP=$(git log -1 --date=unix --format=%cd)
echo "PR_MERGE_COMMIT_TIMESTAMP: $PR_MERGE_COMMIT_TIMESTAMP"
COMMENT_TIMESTAMP=$(date -d "${COMMENT_DATE}" +"%s")
echo "COMMENT_DATE: $COMMENT_DATE"
echo "COMMENT_TIMESTAMP: $COMMENT_TIMESTAMP"
if [ $COMMENT_TIMESTAMP -le $PR_MERGE_COMMIT_TIMESTAMP ]; then
echo "Last commit on the pull request is newer than the issue comment triggering this run! Abort!";
exit -1;
fi
# use a python script to handle this complex logic
# case 1: `run-slow` (auto. infer with limited number of models, but in particular, new model)
# case 2: `run-slow model_1, model_2`
get-tests:
runs-on: ubuntu-22.04
needs: [get-pr-number, get-sha]
if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
outputs:
models: ${{ steps.models_to_run.outputs.models }}
quantizations: ${{ steps.models_to_run.outputs.quantizations }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: "0"
ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"
- name: Verify merge commit SHA
env:
VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
run: |
PR_MERGE_SHA=$(git log -1 --format=%H)
if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
exit -1;
fi
- name: Get models to test
env:
PR_COMMENT: ${{ github.event.comment.body }}
run: |
python -m pip install GitPython
python utils/pr_slow_ci_models.py --message "$PR_COMMENT" | tee output.txt
echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV
python utils/pr_slow_ci_models.py --message "$PR_COMMENT" --quantization | tee output2.txt
echo "quantizations=$(tail -n 1 output2.txt)" >> $GITHUB_ENV
- name: Show models to test
id: models_to_run
run: |
echo "${{ env.models }}"
echo "models=${{ env.models }}" >> $GITHUB_ENV
echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
echo "${{ env.quantizations }}"
echo "quantizations=${{ env.quantizations }}" >> $GITHUB_OUTPUT
reply_to_comment:
name: Reply to the comment
if: ${{ needs.get-tests.outputs.models != '[]' || needs.get-tests.outputs.quantizations != '[]' }}
needs: [get-pr-number, get-tests]
permissions:
pull-requests: write
runs-on: ubuntu-22.04
steps:
- name: Reply to the comment
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
MODELS: ${{ needs.get-tests.outputs.models }}
BODY: "This comment contains run-slow, running the specified jobs:\n\nmodels: ${{ needs.get-tests.outputs.models }}\nquantizations: ${{ needs.get-tests.outputs.quantizations }}"
run: |
gh api \
--method POST \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \
-f "body=This comment contains run-slow, running the specified jobs: ${{ env.BODY }} ..."
create_run:
name: Create run
if: ${{ needs.get-tests.outputs.models != '[]' || needs.get-tests.outputs.quantizations != '[]' }}
needs: [get-sha, get-tests, reply_to_comment]
permissions:
statuses: write
runs-on: ubuntu-22.04
steps:
- name: Create Run
id: create_run
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Create a commit status (pending) for a run of this workflow. The status has to be updated later in `update_run_status`.
# See https://docs.github.com/en/rest/commits/statuses?apiVersion=2022-11-28#create-a-commit-status
GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: |
gh api \
--method POST \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
-f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Slow CI job" -f "context=pytest/custom-tests"
run_models_gpu:
name: Run all tests for the model
if: ${{ needs.get-tests.outputs.models != '[]' }}
needs: [get-pr-number, get-sha, get-tests, create_run]
strategy:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.get-tests.outputs.models) }}
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Echo input and matrix info
shell: bash
run: |
echo "${{ matrix.folders }}"
- name: Echo folder ${{ matrix.folders }}
shell: bash
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
# set the artifact folder names (because the character `/` is not allowed).
run: |
echo "${{ matrix.folders }}"
matrix_folders=${{ matrix.folders }}
matrix_folders=${matrix_folders/'models/'/'models_'}
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: Checkout to PR merge commit
working-directory: /transformers
run: |
git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
git log -1 --format=%H
- name: Verify merge commit SHA
env:
VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
working-directory: /transformers
run: |
PR_MERGE_SHA=$(git log -1 --format=%H)
if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
exit -1;
fi
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Set `machine_type` for report and artifact names
working-directory: /transformers
shell: bash
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
fi
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all tests on GPU
working-directory: /transformers
run: |
export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
echo $CUDA_VISIBLE_DEVICES
python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
- name: Make sure report directory exists
shell: bash
run: |
mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
run_quantization_torch_gpu:
name: Run all tests for a quantization
if: ${{ needs.get-tests.outputs.quantizations != '[]' }}
needs: [get-pr-number, get-sha, get-tests, create_run]
strategy:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.get-tests.outputs.quantizations) }}
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-quantization-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Echo folder ${{ matrix.folders }}
shell: bash
run: |
echo "${{ matrix.folders }}"
matrix_folders=${{ matrix.folders }}
matrix_folders=${matrix_folders/'quantization/'/'quantization_'}
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: Checkout to PR merge commit
working-directory: /transformers
run: |
git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
git log -1 --format=%H
- name: Verify merge commit SHA
env:
VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
working-directory: /transformers
run: |
PR_MERGE_SHA=$(git log -1 --format=%H)
if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
exit -1;
fi
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Set `machine_type` for report and artifact names
working-directory: /transformers
shell: bash
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
fi
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run quantization tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
- name: Make sure report directory exists
shell: bash
run: |
mkdir -p /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports
echo "hello" > /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports/hello.txt
echo "${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports"
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
update_run_status:
name: Update Check Run Status
needs: [get-sha, create_run, run_models_gpu, run_quantization_torch_gpu]
permissions:
statuses: write
if: ${{ always() && needs.create_run.result == 'success' }}
runs-on: ubuntu-22.04
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
STATUS_OK: ${{ contains(fromJSON('["skipped", "success"]'), needs.run_models_gpu.result) && contains(fromJSON('["skipped", "success"]'), needs.run_quantization_torch_gpu.result) }}
steps:
- name: Get `run_models_gpu` job status
run: |
echo "${{ needs.run_models_gpu.result }}"
echo "${{ needs.run_quantization_torch_gpu.result }}"
echo $STATUS_OK
if [ "$STATUS_OK" = "true" ]; then
echo "STATUS=success" >> $GITHUB_ENV
else
echo "STATUS=failure" >> $GITHUB_ENV
fi
- name: Update PR commit statuses
run: |
echo "${{ needs.run_models_gpu.result }}"
echo "${{ env.STATUS }}"
gh api \
--method POST \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
-f "target_url=$GITHUB_RUN_URL" -f "state=${{ env.STATUS }}" -f "description=Slow CI job" -f "context=pytest/custom-tests"

View File

@ -21,6 +21,39 @@ jobs:
echo "$(python3 -c 'print(int(${{ github.run_number }}) % 10)')"
echo "run_number=$(python3 -c 'print(int(${{ github.run_number }}) % 10)')" >> $GITHUB_OUTPUT
run_past_ci_pytorch_1-13:
name: PyTorch 1.13
needs: get_number
if: needs.get_number.outputs.run_number == 0 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
uses: ./.github/workflows/self-past-caller.yml
with:
framework: pytorch
version: "1.13"
sha: ${{ github.sha }}
secrets: inherit
run_past_ci_pytorch_1-12:
name: PyTorch 1.12
needs: get_number
if: needs.get_number.outputs.run_number == 1 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
uses: ./.github/workflows/self-past-caller.yml
with:
framework: pytorch
version: "1.12"
sha: ${{ github.sha }}
secrets: inherit
run_past_ci_pytorch_1-11:
name: PyTorch 1.11
needs: get_number
if: needs.get_number.outputs.run_number == 2 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
uses: ./.github/workflows/self-past-caller.yml
with:
framework: pytorch
version: "1.11"
sha: ${{ github.sha }}
secrets: inherit
run_past_ci_tensorflow_2-11:
name: TensorFlow 2.11
needs: get_number

151
.github/workflows/self-pr-slow-ci.yml vendored Normal file
View File

@ -0,0 +1,151 @@
name: PR slow CI
on:
pull_request:
paths:
- "src/transformers/models/*/modeling_*.py"
- "tests/**/test_*.py"
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
TF_FORCE_GPU_ALLOW_GROWTH: true
RUN_PT_TF_CROSS_TESTS: 1
CUDA_VISIBLE_DEVICES: 0,1
jobs:
find_models_to_run:
runs-on: ubuntu-22.04
name: Find models to run slow tests
# Triggered only if the required label `run-slow` is added
if: ${{ contains(github.event.pull_request.labels.*.name, 'run-slow') }}
outputs:
models: ${{ steps.models_to_run.outputs.models }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: "0"
ref: ${{ github.event.pull_request.head.sha }}
- name: Get commit message
run: |
echo "commit_message=$(git show -s --format=%s)" >> $GITHUB_ENV
- name: Get models to run slow tests
run: |
echo "${{ env.commit_message }}"
python -m pip install GitPython
python utils/pr_slow_ci_models.py --commit_message "${{ env.commit_message }}" | tee output.txt
echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV
- name: Models to run slow tests
id: models_to_run
run: |
echo "${{ env.models }}"
echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
run_models_gpu:
name: Run all tests for the model
# Triggered only `find_models_to_run` is triggered (label `run-slow` is added) which gives the models to run
# (either a new model PR or via a commit message)
if: ${{ needs.find_models_to_run.outputs.models != '[]' }}
needs: find_models_to_run
strategy:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.find_models_to_run.outputs.models) }}
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
runs-on:
group: '${{ matrix.machine_type }}'
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Echo input and matrix info
shell: bash
run: |
echo "${{ matrix.folders }}"
- name: Echo folder ${{ matrix.folders }}
shell: bash
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
# set the artifact folder names (because the character `/` is not allowed).
run: |
echo "${{ matrix.folders }}"
matrix_folders=${{ matrix.folders }}
matrix_folders=${matrix_folders/'models/'/'models_'}
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: Update clone
working-directory: /transformers
run: git fetch && git fetch origin pull/${{ github.event.pull_request.number }}/head:pull/${{ github.event.pull_request.number }}/merge && git checkout pull/${{ github.event.pull_request.number }}/merge
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . && python3 -m pip install --upgrade torch torchaudio torchvision
- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Set `machine_type` for report and artifact names
working-directory: /transformers
shell: bash
run: |
echo "${{ matrix.machine_type }}"
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
machine_type=single-gpu
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
machine_type=multi-gpu
else
machine_type=${{ matrix.machine_type }}
fi
echo "$machine_type"
echo "machine_type=$machine_type" >> $GITHUB_ENV
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all tests on GPU
working-directory: /transformers
run: |
export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
echo $CUDA_VISIBLE_DEVICES
python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
- name: Make sure report directory exists
shell: bash
run: |
mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports

View File

@ -1,25 +1,25 @@
name: Self-hosted runner (AMD mi210 CI caller)
on:
#workflow_run:
# workflows: ["Self-hosted runner (push-caller)"]
# branches: ["main"]
# types: [completed]
push:
branches:
- run_amd_push_ci_caller*
paths:
- "src/**"
- "tests/**"
- ".github/**"
- "templates/**"
- "utils/**"
jobs:
run_amd_ci:
name: AMD mi210
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
uses: ./.github/workflows/self-push-amd.yml
with:
gpu_flavor: mi210
secrets: inherit
name: Self-hosted runner (AMD mi210 CI caller)
on:
workflow_run:
workflows: ["Self-hosted runner (push-caller)"]
branches: ["main"]
types: [completed]
push:
branches:
- run_amd_push_ci_caller*
paths:
- "src/**"
- "tests/**"
- ".github/**"
- "templates/**"
- "utils/**"
jobs:
run_amd_ci:
name: AMD mi210
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
uses: ./.github/workflows/self-push-amd.yml
with:
gpu_flavor: mi210
secrets: inherit

View File

@ -1,25 +1,25 @@
name: Self-hosted runner (AMD mi250 CI caller)
on:
#workflow_run:
# workflows: ["Self-hosted runner (push-caller)"]
# branches: ["main"]
# types: [completed]
push:
branches:
- run_amd_push_ci_caller*
paths:
- "src/**"
- "tests/**"
- ".github/**"
- "templates/**"
- "utils/**"
jobs:
run_amd_ci:
name: AMD mi250
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
uses: ./.github/workflows/self-push-amd.yml
with:
gpu_flavor: mi250
secrets: inherit
name: Self-hosted runner (AMD mi250 CI caller)
on:
workflow_run:
workflows: ["Self-hosted runner (push-caller)"]
branches: ["main"]
types: [completed]
push:
branches:
- run_amd_push_ci_caller*
paths:
- "src/**"
- "tests/**"
- ".github/**"
- "templates/**"
- "utils/**"
jobs:
run_amd_ci:
name: AMD mi250
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
uses: ./.github/workflows/self-push-amd.yml
with:
gpu_flavor: mi250
secrets: inherit

View File

@ -1,10 +1,10 @@
name: Self-hosted runner (AMD mi300 CI caller)
on:
#workflow_run:
# workflows: ["Self-hosted runner (push-caller)"]
# branches: ["main"]
# types: [completed]
workflow_run:
workflows: ["Self-hosted runner (push-caller)"]
branches: ["main"]
types: [completed]
push:
branches:
- run_amd_push_ci_caller*

View File

@ -1,55 +1,55 @@
name: Self-hosted runner (AMD mi210 scheduled CI caller)
on:
workflow_run:
workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
branches: ["main"]
types: [completed]
push:
branches:
- run_amd_scheduled_ci_caller*
jobs:
model-ci:
name: Model CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
with:
job: run_models_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi210
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi210
secrets: inherit
torch-pipeline:
name: Torch pipeline CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
with:
job: run_pipelines_torch_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi210
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi210
secrets: inherit
example-ci:
name: Example CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
with:
job: run_examples_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi210
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi210
secrets: inherit
deepspeed-ci:
name: DeepSpeed CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
with:
job: run_torch_cuda_extensions_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi210
docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
ci_event: Scheduled CI (AMD) - mi210
secrets: inherit
name: Self-hosted runner (AMD mi210 scheduled CI caller)
on:
workflow_run:
workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
branches: ["main"]
types: [completed]
push:
branches:
- run_amd_scheduled_ci_caller*
jobs:
model-ci:
name: Model CI
uses: ./.github/workflows/self-scheduled-amd.yml
with:
job: run_models_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi210
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi210
secrets: inherit
torch-pipeline:
name: Torch pipeline CI
uses: ./.github/workflows/self-scheduled-amd.yml
with:
job: run_pipelines_torch_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi210
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi210
secrets: inherit
example-ci:
name: Example CI
uses: ./.github/workflows/self-scheduled-amd.yml
with:
job: run_examples_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi210
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi210
secrets: inherit
deepspeed-ci:
name: DeepSpeed CI
uses: ./.github/workflows/self-scheduled-amd.yml
with:
job: run_torch_cuda_extensions_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi210
docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
ci_event: Scheduled CI (AMD) - mi210
secrets: inherit

View File

@ -1,55 +1,55 @@
name: Self-hosted runner (AMD mi250 scheduled CI caller)
on:
workflow_run:
workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
branches: ["main"]
types: [completed]
push:
branches:
- run_amd_scheduled_ci_caller*
jobs:
model-ci:
name: Model CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
with:
job: run_models_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi250
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi250
secrets: inherit
torch-pipeline:
name: Torch pipeline CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
with:
job: run_pipelines_torch_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi250
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi250
secrets: inherit
example-ci:
name: Example CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
with:
job: run_examples_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi250
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi250
secrets: inherit
deepspeed-ci:
name: DeepSpeed CI
uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
with:
job: run_torch_cuda_extensions_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi250
docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
ci_event: Scheduled CI (AMD) - mi250
secrets: inherit
name: Self-hosted runner (AMD mi250 scheduled CI caller)
on:
workflow_run:
workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
branches: ["main"]
types: [completed]
push:
branches:
- run_amd_scheduled_ci_caller*
jobs:
model-ci:
name: Model CI
uses: ./.github/workflows/self-scheduled-amd.yml
with:
job: run_models_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi250
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi250
secrets: inherit
torch-pipeline:
name: Torch pipeline CI
uses: ./.github/workflows/self-scheduled-amd.yml
with:
job: run_pipelines_torch_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi250
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi250
secrets: inherit
example-ci:
name: Example CI
uses: ./.github/workflows/self-scheduled-amd.yml
with:
job: run_examples_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi250
docker: huggingface/transformers-pytorch-amd-gpu
ci_event: Scheduled CI (AMD) - mi250
secrets: inherit
deepspeed-ci:
name: DeepSpeed CI
uses: ./.github/workflows/self-scheduled-amd.yml
with:
job: run_torch_cuda_extensions_gpu
slack_report_channel: "#transformers-ci-daily-amd"
runner: mi250
docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
ci_event: Scheduled CI (AMD) - mi250
secrets: inherit

349
.github/workflows/self-scheduled-amd.yml vendored Normal file
View File

@ -0,0 +1,349 @@
name: Self-hosted runner (scheduled-amd)
# Note: For the AMD CI, we rely on a caller workflow and on the workflow_call event to trigger the
# CI in order to run it on both MI210 and MI250, without having to use matrix here which pushes
# us towards the limit of allowed jobs on GitHub Actions.
on:
workflow_call:
inputs:
job:
required: true
type: string
slack_report_channel:
required: true
type: string
runner:
required: true
type: string
docker:
required: true
type: string
ci_event:
required: true
type: string
env:
HF_HOME: /mnt/cache
TRANSFORMERS_IS_CI: yes
OMP_NUM_THREADS: 8
MKL_NUM_THREADS: 8
RUN_SLOW: yes
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
NUM_SLICES: 2
# Important note: each job (run_tests_single_gpu, run_tests_multi_gpu, run_examples_gpu, run_pipelines_torch_gpu) requires all the previous jobs before running.
# This is done so that we avoid parallelizing the scheduled tests, to leave available
# runners for the push CI that is running on the same machine.
jobs:
check_runner_status:
name: Check Runner Status
runs-on: ubuntu-22.04
steps:
- name: Checkout transformers
uses: actions/checkout@v4
with:
fetch-depth: 2
- name: Check Runner Status
run: python utils/check_self_hosted_runner.py --target_runners hf-amd-mi210-ci-1gpu-1,hf-amd-mi250-ci-1gpu-1,hf-amd-mi300-ci-1gpu-1 --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
check_runners:
name: Check Runners
needs: check_runner_status
strategy:
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
setup:
if: contains(fromJSON('["run_models_gpu"]'), inputs.job)
name: Setup
needs: check_runners
strategy:
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
image: huggingface/transformers-pytorch-amd-gpu
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
outputs:
folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
steps:
- name: Update clone
working-directory: /transformers
run: |
git fetch && git checkout ${{ github.sha }}
- name: Cleanup
working-directory: /transformers
run: |
rm -rf tests/__pycache__
rm -rf tests/models/__pycache__
rm -rf reports
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- id: set-matrix
name: Identify models to test
working-directory: /transformers/tests
run: |
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
run_models_gpu:
if: ${{ inputs.job == 'run_models_gpu' }}
name: Single GPU tests
needs: setup
strategy:
max-parallel: 1 # For now, not to parallelize. Can change later if it works well.
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
uses: ./.github/workflows/model_jobs_amd.yml
with:
folder_slices: ${{ needs.setup.outputs.folder_slices }}
machine_type: ${{ matrix.machine_type }}
slice_id: ${{ matrix.slice_id }}
runner: ${{ inputs.runner }}
docker: ${{ inputs.docker }}
secrets: inherit
run_pipelines_torch_gpu:
if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
name: PyTorch pipelines
needs: check_runners
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
image: ${{ inputs.docker }}
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all pipeline tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
run_examples_gpu:
if: ${{ inputs.job == 'run_examples_gpu' }}
name: Examples directory
needs: check_runners
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu]
runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
image: ${{ inputs.docker }}
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run examples tests on GPU
working-directory: /transformers
run: |
pip install -r examples/pytorch/_tests_requirements.txt
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch -m "not not_device_test"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports
path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports
run_torch_cuda_extensions_gpu:
if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }}
name: Torch ROCm deepspeed tests
needs: check_runners
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
container:
image: ${{ inputs.docker }}
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
- name: ROCM-SMI
run: |
rocm-smi
- name: ROCM-INFO
run: |
rocminfo | grep "Agent" -A 14
- name: Show ROCR environment
run: |
echo "ROCR: $ROCR_VISIBLE_DEVICES"
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze
- name: Run all tests on GPU
working-directory: /transformers
run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended -m "not not_device_test"
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
send_results:
name: Slack Report
needs: [
check_runner_status,
check_runners,
setup,
run_models_gpu,
run_pipelines_torch_gpu,
run_examples_gpu,
run_torch_cuda_extensions_gpu
]
if: ${{ always() }}
uses: ./.github/workflows/slack-report.yml
with:
job: ${{ inputs.job }}
# This would be `skipped` if `setup` is skipped.
setup_status: ${{ needs.setup.result }}
slack_report_channel: ${{ inputs.slack_report_channel }}
# This would be an empty string if `setup` is skipped.
folder_slices: ${{ needs.setup.outputs.folder_slices }}
quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
ci_event: ${{ inputs.ci_event }}
secrets: inherit

View File

@ -366,7 +366,7 @@ jobs:
run: |
python3 -m pip uninstall -y deepspeed
rm -rf DeepSpeed
git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build
git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build
DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
- name: NVIDIA-SMI

View File

@ -70,7 +70,7 @@ jobs:
with:
name: ci_results_${{ inputs.job }}
path: ci_results_${{ inputs.job }}
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4
- name: Send message to Slack for quantization workflow
@ -90,7 +90,7 @@ jobs:
pip install huggingface_hub
pip install slack_sdk
pip show slack_sdk
python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}"
python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}"
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
- name: Failure table artifacts
@ -98,4 +98,4 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: ci_results_${{ inputs.job }}
path: ci_results_${{ inputs.job }}
path: ci_results_${{ inputs.job }}

View File

@ -16,5 +16,3 @@ jobs:
fetch-depth: 0
- name: Secret Scanning
uses: trufflesecurity/trufflehog@main
with:
extra_args: --results=verified,unknown

View File

@ -249,43 +249,23 @@ The model itself is a regular [Pytorch `nn.Module`](https://pytorch.org/docs/sta
### With pip
This repository is tested on Python 3.9+, Flax 0.4.1+, PyTorch 2.0+, and TensorFlow 2.6+.
This repository is tested on Python 3.9+, Flax 0.4.1+, PyTorch 1.11+, and TensorFlow 2.6+.
You should install 🤗 Transformers in a [virtual environment](https://docs.python.org/3/library/venv.html). If you're unfamiliar with Python virtual environments, check out the [user guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).
First, create a virtual environment with the version of Python you're going to use and activate it.
**macOS/Linux**
```python -m venv env
source env/bin/activate
```
**Windows**
``` python -m venv env
env\Scripts\activate
```
To use 🤗 Transformers, you must install at least one of Flax, PyTorch, or TensorFlow. Refer to the official installation guides for platform-specific commands:
[TensorFlow installation page](https://www.tensorflow.org/install/),
[PyTorch installation page](https://pytorch.org/get-started/locally/#start-locally) and/or [Flax](https://github.com/google/flax#quick-install) and [Jax](https://github.com/google/jax#installation)
Then, you will need to install at least one of Flax, PyTorch, or TensorFlow.
Please refer to [TensorFlow installation page](https://www.tensorflow.org/install/), [PyTorch installation page](https://pytorch.org/get-started/locally/#start-locally) and/or [Flax](https://github.com/google/flax#quick-install) and [Jax](https://github.com/google/jax#installation) installation pages regarding the specific installation command for your platform.
When one of those backends has been installed, 🤗 Transformers can be installed using pip as follows:
```
```bash
pip install transformers
```
If you'd like to play with the examples or need the bleeding edge of the code and can't wait for a new release, you must [install the library from source](https://huggingface.co/docs/transformers/installation#installing-from-source).
```
git clone https://github.com/huggingface/transformers.git
cd transformers
pip install .
```
### With conda
🤗 Transformers can be installed using conda as follows:

View File

@ -15,7 +15,7 @@ to add it.
Keywords: Open-source, LLaMa, GPT-J, instruction, assistant
## [recommenders](https://github.com/recommenders-team/recommenders)
## [recommenders](https://github.com/microsoft/recommenders)
This repository contains examples and best practices for building recommendation systems, provided as Jupyter notebooks. It goes over several aspects required to build efficient recommendation systems: data preparation, modeling, evaluation, model selection & optimization, as well as operationalization
@ -39,15 +39,15 @@ MindsDB is a low-code ML platform, which automates and integrates several ML fra
Keywords: Database, low-code, AI table
## [langchain](https://github.com/langchain-ai/langchain)
## [langchain](https://github.com/hwchase17/langchain)
[langchain](https://github.com/langchain-ai/langchain) is aimed at assisting in the development of apps merging both LLMs and other sources of knowledge. The library allows chaining calls to applications, creating a sequence across many tools.
[langchain](https://github.com/hwchase17/langchain) is aimed at assisting in the development of apps merging both LLMs and other sources of knowledge. The library allows chaining calls to applications, creating a sequence across many tools.
Keywords: LLMs, Large Language Models, Agents, Chains
## [LlamaIndex](https://github.com/run-llama/llama_index)
## [LlamaIndex](https://github.com/jerryjliu/llama_index)
[LlamaIndex](https://github.com/run-llama/llama_index) is a project that provides a central interface to connect your LLM's with external data. It provides various kinds of indices and retreival mechanisms to perform different LLM tasks and obtain knowledge-augmented results.
[LlamaIndex](https://github.com/jerryjliu/llama_index) is a project that provides a central interface to connect your LLM's with external data. It provides various kinds of indices and retreival mechanisms to perform different LLM tasks and obtain knowledge-augmented results.
Keywords: LLMs, Large Language Models, Data Retrieval, Indices, Knowledge Augmentation
@ -146,9 +146,9 @@ Keywords: Framework, simplicity, NLP
Keywords: LLM, Agents, HF Hub
## [transformers.js](https://github.com/huggingface/transformers.js/)
## [transformers.js](https://xenova.github.io/transformers.js/)
[transformers.js](https://github.com/huggingface/transformers.js/) is a JavaScript library targeted at running models from transformers directly within the browser.
[transformers.js](https://xenova.github.io/transformers.js/) is a JavaScript library targeted at running models from transformers directly within the browser.
Keywords: Transformers, JavaScript, browser
@ -437,7 +437,7 @@ Keywords: DALL-E, Russian
Keywords: Knowledge Extraction, Knowledge Graphs
## [Nebuly](https://github.com/nebuly-ai/optimate)
## [Nebuly](https://github.com/nebuly-ai/nebuly)
Nebuly is the next-generation platform to monitor and optimize your AI costs in one place. The platform connects to all your AI cost sources (compute, API providers, AI software licenses, etc) and centralizes them in one place to give you full visibility on a model basis. The platform also provides optimization recommendations and a co-pilot model that can guide during the optimization process. The platform builds on top of the open-source tools allowing you to optimize the different steps of your AI stack to squeeze out the best possible cost performances.

View File

@ -1,49 +0,0 @@
# Benchmarks
You might want to add new benchmarks.
You will need to define a python function named `run_benchmark` in your python file and the file must be located in this `benchmark/` directory.
The expected function signature is the following:
```py
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
```
## Writing metrics to the database
`MetricRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
cf [`llama.py`](./llama.py) to see an example of this in practice.
```py
from benchmarks_entrypoint import MetricsRecorder
import psycopg2
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
# To collect device measurements
metrics_recorder.collect_device_measurements(
benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
)
# To collect your model measurements
metrics_recorder.collect_model_measurements(
benchmark_id,
{
"model_load_time": model_load_time,
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
"first_eager_generate_time_secs": first_eager_generate_time,
"second_eager_generate_time_secs": second_eager_generate_time,
"time_to_first_token_secs": time_to_first_token,
"time_to_second_token_secs": time_to_second_token,
"time_to_third_token_secs": time_to_third_token,
"time_to_next_token_mean_secs": mean_time_to_next_token,
"first_compile_generate_time_secs": first_compile_generate_time,
"second_compile_generate_time_secs": second_compile_generate_time,
"third_compile_generate_time_secs": third_compile_generate_time,
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
},
)
```

View File

@ -1,144 +0,0 @@
import argparse
import importlib.util
import logging
import os
from typing import Dict
import psycopg2
import sys
from psycopg2.extras import Json
from psycopg2.extensions import register_adapter
register_adapter(dict, Json)
class ImportModuleException(Exception):
pass
class MetricsRecorder:
def __init__(self, connection, logger: logging.Logger, branch: str, commit_id: str, commit_msg: str):
self.conn = connection
self.conn.autocommit = True
self.logger = logger
self.branch = branch
self.commit_id = commit_id
self.commit_msg = commit_msg
def initialise_benchmark(self, metadata: Dict[str, str]) -> int:
"""
Creates a new benchmark, returns the benchmark id
"""
# gpu_name: str, model_id: str
with self.conn.cursor() as cur:
cur.execute(
"INSERT INTO benchmarks (branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s) RETURNING benchmark_id",
(self.branch, self.commit_id, self.commit_msg, metadata),
)
benchmark_id = cur.fetchone()[0]
logger.debug(f"initialised benchmark #{benchmark_id}")
return benchmark_id
def collect_device_measurements(self, benchmark_id: int, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
"""
Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function.
"""
with self.conn.cursor() as cur:
cur.execute(
"INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
(benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
)
self.logger.debug(
f"inserted device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
)
def collect_model_measurements(self, benchmark_id: int, measurements: Dict[str, float]):
with self.conn.cursor() as cur:
cur.execute(
"""
INSERT INTO model_measurements (
benchmark_id,
measurements
) VALUES (%s, %s)
""",
(
benchmark_id,
measurements,
),
)
self.logger.debug(f"inserted model measurements for benchmark #{benchmark_id}: {measurements}")
def close(self):
self.conn.close()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
def parse_arguments():
"""
Parse command line arguments for the benchmarking CLI.
"""
parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
parser.add_argument(
"branch",
type=str,
help="The branch name on which the benchmarking is performed.",
)
parser.add_argument(
"commit_id",
type=str,
help="The commit hash on which the benchmarking is performed.",
)
parser.add_argument(
"commit_msg",
type=str,
help="The commit message associated with the commit, truncated to 70 characters.",
)
args = parser.parse_args()
return args.branch, args.commit_id, args.commit_msg
def import_from_path(module_name, file_path):
try:
spec = importlib.util.spec_from_file_location(module_name, file_path)
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
except Exception as e:
raise ImportModuleException(f"failed to load python module: {e}")
if __name__ == "__main__":
benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__))
branch, commit_id, commit_msg = parse_arguments()
for entry in os.scandir(benchmarks_folder_path):
try:
if not entry.name.endswith(".py"):
continue
if entry.path == __file__:
continue
logger.debug(f"loading: {entry.name}")
module = import_from_path(entry.name.split(".")[0], entry.path)
logger.info(f"runnning benchmarks in: {entry.name}")
module.run_benchmark(logger, branch, commit_id, commit_msg)
except ImportModuleException as e:
logger.error(e)
except Exception as e:
logger.error(f"error running benchmarks for {entry.name}: {e}")

View File

@ -1,10 +0,0 @@
apiVersion: 1
providers:
- name: 'Transformers Benchmarks'
orgId: 1
type: file
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /etc/grafana/dashboards

View File

@ -30,7 +30,7 @@
"title": "Go to data",
"tooltip": "Go to data",
"type": "link",
"url": "http://transformers-benchmarks.hf.co/d/fdz33iyzln9c0a/transformers-benchmarks?orgId=1&from=${StartTime}&to=${EndTime}"
"url": "http://transformers-benchmarks.huggingface.co/d/fdz33iyzln9c0a/transformers-benchmarks?orgId=1&from=${StartTime}&to=${EndTime}"
}
],
"liveNow": true,
@ -77,7 +77,7 @@
"properties": [
{
"id": "custom.width",
"value": 202
"value": 196
}
]
},
@ -101,7 +101,7 @@
"properties": [
{
"id": "custom.width",
"value": 524
"value": 581
}
]
},
@ -113,19 +113,7 @@
"properties": [
{
"id": "custom.width",
"value": 353
}
]
},
{
"matcher": {
"id": "byName",
"options": "model_id"
},
"properties": [
{
"id": "custom.width",
"value": 216
"value": 379
}
]
}
@ -155,14 +143,12 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"type": "grafana-postgresql-datasource"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT commit_id, commit_message, metadata->>'gpu_name' as gpu_name, metadata->>'model_id' as model_id, created_at AS date FROM benchmarks WHERE branch = '${branch}' AND metadata->>'gpu_name' = '${gpu_name}' ORDER BY benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT commit_id as commit_id, commit_message, gpu_name, created_at AS date FROM benchmarks WHERE branch = '${branch}' ORDER BY benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -320,14 +306,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'first_eager_forward_pass_time_secs' AS double precision) AS first_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'first_eager_forward_pass_time_secs' AS double precision) AS first_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -446,14 +431,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'second_eager_forward_pass_time_secs' AS double precision) AS second_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'second_eager_forward_pass_time_secs' AS double precision) AS second_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -581,14 +565,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'time_to_first_token_secs' AS double precision) AS time_to_first_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'time_to_first_token_secs' AS double precision) AS time_to_first_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -703,14 +686,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'time_to_second_token_secs' AS double precision) AS time_to_second_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'time_to_second_token_secs' AS double precision) AS time_to_second_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -825,14 +807,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'time_to_third_token_secs' AS double precision) AS time_to_third_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'time_to_third_token_secs' AS double precision) AS time_to_third_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -947,14 +928,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'time_to_next_token_mean_secs' AS double precision) AS time_to_next_token_mean_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'time_to_next_token_mean_secs' AS double precision) AS time_to_next_token_mean_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -1082,14 +1062,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'first_compile_generate_time_secs' AS double precision) AS first_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'first_compile_generate_time_secs' AS double precision) AS first_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -1204,14 +1183,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'second_compile_generate_time_secs' AS double precision) AS second_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'second_compile_generate_time_secs' AS double precision) AS second_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -1326,14 +1304,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'third_compile_generate_time_secs' AS double precision) AS third_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'third_compile_generate_time_secs' AS double precision) AS third_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -1448,14 +1425,13 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
"rawQuery": true,
"rawSql": "SELECT CAST(m.measurements->'fourth_compile_generate_time_secs' AS double precision) AS fourth_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"rawSql": "SELECT CAST(m.measurements->'fourth_compile_generate_time_secs' AS double precision) AS fourth_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
"refId": "A",
"sql": {
"columns": [
@ -1504,7 +1480,11 @@
"id": 15,
"panels": [
{
"datasource": {},
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
"fieldConfig": {
"defaults": {
"color": {
@ -1548,7 +1528,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -1582,9 +1563,8 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
@ -1685,7 +1665,11 @@
"type": "timeseries"
},
{
"datasource": {},
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
"fieldConfig": {
"defaults": {
"color": {
@ -1729,7 +1713,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -1763,9 +1748,8 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
@ -1866,7 +1850,11 @@
"type": "timeseries"
},
{
"datasource": {},
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
"fieldConfig": {
"defaults": {
"color": {
@ -1910,7 +1898,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -1944,9 +1933,8 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
@ -2047,7 +2035,11 @@
"type": "timeseries"
},
{
"datasource": {},
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
"fieldConfig": {
"defaults": {
"color": {
@ -2091,7 +2083,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -2125,9 +2118,8 @@
"targets": [
{
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
"uid": "bdz2yss7sxo1sc"
},
"editorMode": "code",
"format": "table",
@ -2232,6 +2224,7 @@
"type": "row"
}
],
"refresh": "",
"schemaVersion": 39,
"tags": [],
"templating": {
@ -2243,7 +2236,6 @@
"value": "main"
},
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
@ -2256,7 +2248,7 @@
"name": "branch",
"options": [],
"query": "SELECT DISTINCT branch FROM benchmarks;",
"refresh": 1,
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
@ -2269,7 +2261,6 @@
"value": "1729701492845"
},
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
@ -2290,11 +2281,10 @@
{
"current": {
"selected": false,
"text": "1730393397577",
"value": "1730393397577"
"text": "1730120430069",
"value": "1730120430069"
},
"datasource": {
"default": true,
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
@ -2322,16 +2312,15 @@
"type": "grafana-postgresql-datasource",
"uid": "be28nkzirtb0gd"
},
"definition": "SELECT DISTINCT metadata->>'gpu_name' FROM benchmarks;",
"description": "",
"definition": "SELECT DISTINCT gpu_name FROM benchmarks;",
"hide": 0,
"includeAll": false,
"label": "GPU",
"multi": false,
"name": "gpu_name",
"options": [],
"query": "SELECT DISTINCT metadata->>'gpu_name' FROM benchmarks;",
"refresh": 1,
"query": "SELECT DISTINCT gpu_name FROM benchmarks;",
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
@ -2339,7 +2328,7 @@
},
{
"current": {
"selected": true,
"selected": false,
"text": "10",
"value": "10"
},
@ -2370,6 +2359,6 @@
"timezone": "browser",
"title": "Transformers benchmarks",
"uid": "fdz33iyzln9c0a",
"version": 10,
"version": 4,
"weekStart": ""
}

View File

@ -1,17 +0,0 @@
apiVersion: 1
datasources:
- name: grafana-postgresql-datasource
uid: be28nkzirtb0gd
type: postgres
url: $GRAFANA_POSTGRES_DATASOURCE_URL
user: $GRAFANA_POSTGRES_DATASOURCE_USER
secureJsonData:
password: $GRAFANA_POSTGRES_DATASOURCE_PWD
jsonData:
database: metrics
maxOpenConns: 100
maxIdleConns: 100
maxIdleConnsAuto: true
connMaxLifetime: 14400
postgresVersion: 1000
timescaledb: false

View File

@ -3,7 +3,7 @@ CREATE TABLE IF NOT EXISTS benchmarks (
branch VARCHAR(255),
commit_id VARCHAR(72),
commit_message VARCHAR(70),
metadata jsonb,
gpu_name VARCHAR(255),
created_at timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
);

View File

@ -1,25 +1,71 @@
from logging import Logger
import argparse
import json
import logging
import os
import sys
from statistics import mean
from threading import Event, Thread
from time import perf_counter, sleep
from typing import Optional
from benchmarks_entrypoint import MetricsRecorder
import gpustat
import psutil
import psycopg2
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
from psycopg2.extras import Json
from psycopg2.extensions import register_adapter
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
os.environ["TOKENIZERS_PARALLELISM"] = "1"
torch.set_float32_matmul_precision("high")
register_adapter(dict, Json)
def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
def parse_arguments():
"""
Parse command line arguments for the benchmarking CLI.
"""
parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
parser.add_argument(
"branch",
type=str,
help="The branch name on which the benchmarking is performed.",
)
parser.add_argument(
"commit_id",
type=str,
help="The commit hash on which the benchmarking is performed.",
)
parser.add_argument(
"commit_msg",
type=str,
help="The commit message associated with the commit, truncated to 70 characters.",
)
args = parser.parse_args()
return args.branch, args.commit_id, args.commit_msg
def collect_metrics(benchmark_id, continue_metric_collection):
p = psutil.Process(os.getpid())
conn = psycopg2.connect("dbname=metrics")
cur = conn.cursor()
while not continue_metric_collection.is_set():
with p.oneshot():
cpu_util = p.cpu_percent()
@ -27,41 +73,47 @@ def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
gpu_stats = gpustat.GPUStatCollection.new_query()
gpu_util = gpu_stats[0]["utilization.gpu"]
gpu_mem_megabytes = gpu_stats[0]["memory.used"]
metrics_recorder.collect_device_measurements(
benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
cur.execute(
"INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
(benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
)
sleep(0.01)
conn.commit()
conn.close()
def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
def run_benchmark(branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
continue_metric_collection = Event()
metrics_thread = None
model_id = "meta-llama/Llama-2-7b-hf"
metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
try:
gpu_stats = gpustat.GPUStatCollection.new_query()
gpu_name = gpu_stats[0]["name"]
benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}")
metrics_thread = Thread(
target=collect_metrics,
args=[benchmark_id, continue_metric_collection, metrics_recorder],
conn = psycopg2.connect("dbname=metrics")
cur = conn.cursor()
cur.execute(
"INSERT INTO benchmarks (branch, commit_id, commit_message, gpu_name) VALUES (%s, %s, %s, %s) RETURNING benchmark_id",
(branch, commit_id, commit_msg, gpu_name),
)
conn.commit()
benchmark_id = cur.fetchone()[0]
logger.info(f"running benchmark #{benchmark_id} on {gpu_name}")
metrics_thread = Thread(target=collect_metrics, args=[benchmark_id, continue_metric_collection])
metrics_thread.start()
logger.info("started background thread to fetch device metrics")
os.environ["TOKENIZERS_PARALLELISM"] = "false" # silence warnings when compiling
device = "cuda"
ckpt = "meta-llama/Llama-2-7b-hf"
logger.info("downloading weights")
# This is to avoid counting download in model load time measurement
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16)
model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16)
gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
logger.info("loading model")
start = perf_counter()
model = AutoModelForCausalLM.from_pretrained(
model_id, torch_dtype=torch.float16, generation_config=gen_config
ckpt, torch_dtype=torch.float16, generation_config=gen_config
).eval()
model.to(device)
torch.cuda.synchronize()
@ -69,7 +121,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
model_load_time = end - start
logger.info(f"loaded model in: {model_load_time}s")
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(ckpt)
prompt = "Why dogs are so cute?"
inputs = tokenizer(prompt, return_tensors="pt").to(device)
@ -316,27 +368,41 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s")
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
metrics_recorder.collect_model_measurements(
benchmark_id,
{
"model_load_time": model_load_time,
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
"first_eager_generate_time_secs": first_eager_generate_time,
"second_eager_generate_time_secs": second_eager_generate_time,
"time_to_first_token_secs": time_to_first_token,
"time_to_second_token_secs": time_to_second_token,
"time_to_third_token_secs": time_to_third_token,
"time_to_next_token_mean_secs": mean_time_to_next_token,
"first_compile_generate_time_secs": first_compile_generate_time,
"second_compile_generate_time_secs": second_compile_generate_time,
"third_compile_generate_time_secs": third_compile_generate_time,
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
},
cur.execute(
"""
INSERT INTO model_measurements (
benchmark_id,
measurements
) VALUES (%s, %s)
""",
(
benchmark_id,
{
"model_load_time": model_load_time,
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
"first_eager_generate_time_secs": first_eager_generate_time,
"second_eager_generate_time_secs": second_eager_generate_time,
"time_to_first_token_secs": time_to_first_token,
"time_to_second_token_secs": time_to_second_token,
"time_to_third_token_secs": time_to_third_token,
"time_to_next_token_mean_secs": mean_time_to_next_token,
"first_compile_generate_time_secs": first_compile_generate_time,
"second_compile_generate_time_secs": second_compile_generate_time,
"third_compile_generate_time_secs": third_compile_generate_time,
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
},
),
)
conn.commit()
conn.close()
except Exception as e:
logger.error(f"Caught exception: {e}")
continue_metric_collection.set()
if metrics_thread is not None:
metrics_thread.join()
metrics_recorder.close()
if __name__ == "__main__":
branch, commit_id, commit_msg = parse_arguments()
run_benchmark(branch, commit_id, commit_msg, num_tokens_to_generate=20)

View File

@ -1,4 +1,4 @@
FROM python:3.9-slim
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
USER root
ARG REF=main

View File

@ -1,4 +1,4 @@
FROM python:3.9-slim
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
USER root
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake wget xz-utils build-essential g++5 libprotobuf-dev protobuf-compiler

View File

@ -1,4 +1,4 @@
FROM python:3.9-slim
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
USER root
RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git

View File

@ -1,4 +1,4 @@
FROM python:3.9-slim
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
USER root
RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git

View File

@ -1,4 +1,4 @@
FROM python:3.9-slim
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
ARG REF=main
USER root

View File

@ -1,4 +1,4 @@
FROM python:3.9-slim
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
ARG REF=main
USER root

View File

@ -1,4 +1,4 @@
FROM python:3.9-slim
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
ARG REF=main
USER root

View File

@ -1,4 +1,4 @@
FROM python:3.9-slim
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
ARG REF=main
USER root

View File

@ -1,4 +1,4 @@
FROM python:3.9-slim
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
ARG REF=main
USER root

View File

@ -1,4 +1,4 @@
FROM python:3.9-slim
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
ARG REF=main
USER root

View File

@ -1,4 +1,4 @@
FROM python:3.9-slim
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
ARG REF=main
USER root

View File

@ -1,4 +1,4 @@
FROM python:3.9-slim
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
ARG REF=main
USER root

View File

@ -1,4 +1,4 @@
FROM python:3.9-slim
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
ARG REF=main
RUN echo ${REF}

View File

@ -9,7 +9,7 @@ SHELL ["sh", "-lc"]
# The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
# to be used as arguments for docker build (so far).
ARG PYTORCH='2.6.0'
ARG PYTORCH='2.5.1'
# (not always a valid torch version)
ARG INTEL_TORCH_EXT='2.3.0'
# Example: `cu102`, `cu113`, etc.
@ -65,9 +65,6 @@ RUN python3 -m pip install --no-cache-dir python-Levenshtein
# For `FastSpeech2ConformerTokenizer` tokenizer
RUN python3 -m pip install --no-cache-dir g2p-en
# For Some bitsandbytes tests
RUN python3 -m pip install --no-cache-dir einops
# When installing in editable mode, `transformers` is not recognized as a package.
# this line must be added in order for python to be aware of transformers.
RUN cd transformers && python3 setup.py develop

View File

@ -8,9 +8,7 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip && python3 -m pip instal
RUN apt-get -y update && apt-get install -y libsndfile1-dev && apt install -y tesseract-ocr
# Torch needs to be installed before deepspeed
# RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed]
RUN python3 -m pip uninstall -y deepspeed
RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed]
RUN python3 -m pip install --no-cache-dir torchvision git+https://github.com/facebookresearch/detectron2.git pytesseract
RUN python3 -m pip install -U "itsdangerous<2.1.0"

View File

@ -48,8 +48,8 @@ RUN python3 -m pip uninstall -y torch-tensorrt apex
# Pre-build **nightly** release of DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
RUN python3 -m pip uninstall -y deepspeed
# This has to be run inside the GPU VMs running the tests. (So far, it fails here due to GPU checks during compilation.)
# Issue: https://github.com/deepspeedai/DeepSpeed/issues/2010
# RUN git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build && \
# Issue: https://github.com/microsoft/DeepSpeed/issues/2010
# RUN git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build && \
# DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
RUN python3 -m pip install -U "itsdangerous<2.1.0"

View File

@ -1,18 +1,17 @@
FROM rocm/dev-ubuntu-22.04:6.2.4
FROM rocm/dev-ubuntu-22.04:6.0.2
# rocm/pytorch has no version with 2.1.0
LABEL maintainer="Hugging Face"
ARG DEBIAN_FRONTEND=noninteractive
RUN apt update && \
apt install -y --no-install-recommends git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-dev python3-pip python3-dev ffmpeg git-lfs && \
apt install -y --no-install-recommends git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-dev python3-pip python3-dev ffmpeg && \
apt clean && \
rm -rf /var/lib/apt/lists/*
RUN git lfs install
RUN python3 -m pip install --no-cache-dir --upgrade pip numpy
RUN python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2
RUN python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.0
RUN python3 -m pip install --no-cache-dir --upgrade importlib-metadata setuptools ninja git+https://github.com/facebookresearch/detectron2.git pytesseract "itsdangerous<2.1.0"
@ -31,5 +30,5 @@ RUN python3 -m pip uninstall -y tensorflow flax
# this line must be added in order for python to be aware of transformers.
RUN cd transformers && python3 setup.py develop
# Remove nvml and nvidia-ml-py as it is not compatible with ROCm. apex is not tested on NVIDIA either.
RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y
# Remove nvml as it is not compatible with ROCm. apex is not tested on NVIDIA either.
RUN python3 -m pip uninstall py3nvml pynvml apex -y

View File

@ -1,11 +1,11 @@
FROM rocm/dev-ubuntu-22.04:6.2.4
FROM rocm/dev-ubuntu-22.04:5.6
LABEL maintainer="Hugging Face"
ARG DEBIAN_FRONTEND=noninteractive
ARG PYTORCH='2.5.1'
ARG TORCH_VISION='0.20.0'
ARG TORCH_AUDIO='2.5.0'
ARG ROCM='6.2'
ARG PYTORCH='2.1.1'
ARG TORCH_VISION='0.16.1'
ARG TORCH_AUDIO='2.1.1'
ARG ROCM='5.6'
RUN apt update && \
apt install -y --no-install-recommends \
@ -45,4 +45,4 @@ RUN cd transformers && python3 setup.py develop
RUN python3 -c "from deepspeed.launcher.runner import main"
# Remove nvml as it is not compatible with ROCm
RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y
RUN python3 -m pip uninstall py3nvml pynvml -y

View File

@ -1,5 +1,5 @@
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11
FROM nvcr.io/nvidia/pytorch:23.11-py3
FROM nvcr.io/nvidia/pytorch:23.04-py3
LABEL maintainer="Hugging Face"
ARG DEBIAN_FRONTEND=noninteractive

View File

@ -34,8 +34,8 @@ RUN python3 -m pip uninstall -y torch-tensorrt apex
# Pre-build **nightly** release of DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
RUN python3 -m pip uninstall -y deepspeed
# This has to be run inside the GPU VMs running the tests. (So far, it fails here due to GPU checks during compilation.)
# Issue: https://github.com/deepspeedai/DeepSpeed/issues/2010
# RUN git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build && \
# Issue: https://github.com/microsoft/DeepSpeed/issues/2010
# RUN git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build && \
# DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
## For `torchdynamo` tests

View File

@ -11,7 +11,7 @@ ARG REF=main
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
# If set to nothing, will install the latest version
ARG PYTORCH='2.6.0'
ARG PYTORCH='2.5.1'
ARG TORCH_VISION=''
ARG TORCH_AUDIO=''
# Example: `cu102`, `cu113`, etc.

View File

@ -50,12 +50,6 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef
# Add aqlm for quantization testing
RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2
# Add vptq for quantization testing
RUN python3 -m pip install --no-cache-dir vptq
# Add spqr for quantization testing
RUN python3 -m pip install --no-cache-dir spqr_quant[gpu]
# Add hqq for quantization testing
RUN python3 -m pip install --no-cache-dir hqq
@ -72,10 +66,6 @@ RUN python3 -m pip install --no-cache-dir optimum-quanto
# Add eetq for quantization testing
RUN python3 -m pip install git+https://github.com/NetEase-FuXi/EETQ.git
# Add flute-kernel and fast_hadamard_transform for quantization testing
RUN python3 -m pip install --no-cache-dir flute-kernel==0.3.0 -i https://flute-ai.github.io/whl/cu118
RUN python3 -m pip install --no-cache-dir fast_hadamard_transform==1.0.4.post1
# When installing in editable mode, `transformers` is not recognized as a package.
# this line must be added in order for python to be aware of transformers.
RUN cd transformers && python3 setup.py develop

View File

@ -30,26 +30,26 @@
- local: conversations
title: الدردشة مع المحولات
title: البرامج التعليمية
- sections:
- isExpanded: false
sections:
- local: tasks/sequence_classification
title: تصنيف النصوص
- local: tasks/token_classification
title: تصنيف الرموز
- local: tasks/question_answering
title: الإجابة على الأسئلة
- local: tasks/language_modeling
title: نمذجة اللغة السببية
- local: tasks/masked_language_modeling
title: نمذجة اللغة المقنعة
- local: tasks/translation
title: الترجمة
- local: tasks/summarization
title: التلخيص
- local: tasks/multiple_choice
title: الاختيار المتعدد
title: معالجة اللغات الطبيعية
# - sections:
# - isExpanded: false
# sections:
# - local: tasks/sequence_classification
# title: تصنيف النصوص
# - local: tasks/token_classification
# title: تصنيف الرموز
# - local: tasks/question_answering
# title: الإجابة على الأسئلة
# - local: tasks/language_modeling
# title: نمذجة اللغة السببية
# - local: tasks/masked_language_modeling
# title: نمذجة اللغة المقنعة
# - local: tasks/translation
# title: الترجمة
# - local: tasks/summarization
# title: التلخيص
# - local: tasks/multiple_choice
# title: الاختيار المتعدد
# title: معالجة اللغات الطبيعية
# - isExpanded: false
# sections:
# - local: tasks/audio_classification
@ -107,10 +107,10 @@
# - local: tasks/prompting
# title: دليل إرشادي لمحفزات النماذج اللغوية الكبيرة
# title: الإرشاد
title: أدلة المهام
# title: أدلة المهام
- sections:
- local: fast_tokenizers
title: استخدم مجزئيات النصوص السريعة من 🤗 Tokenizers
title: استخدم مجزئيات النصوص السريعة من 🤗 Tokenizers
- local: multilingual
title: الاستدلال باستخدام نماذج متعددة اللغات
- local: create_a_model
@ -129,20 +129,16 @@
title: التصدير إلى TFLite
- local: torchscript
title: التصدير إلى TorchScript
- local: benchmarks
title: المعايير
- local: notebooks
title: دفاتر الملاحظات مع الأمثلة
- local: community
title: موارد المجتمع
# - local: community
# title: موارد المجتمع
- local: troubleshooting
title: استكشاف الأخطاء وإصلاحها
- local: gguf
title: التوافق مع ملفات GGUF
- local: tiktoken
title: التوافق مع ملفات TikToken
- local: modular_transformers
title: الوحدات النمطية في `transformers`
- local: how_to_hack_models
title: اختراق النموذج (الكتابة فوق فئة لاستخدامك)
title: أدلة المطورين
# - sections:
# - local: quantization/overview
@ -155,8 +151,6 @@
# title: AWQ
# - local: quantization/aqlm
# title: AQLM
# - local: quantization/vptq
# title: VPTQ
# - local: quantization/quanto
# title: Quanto
# - local: quantization/eetq
@ -881,7 +875,7 @@
# - local: internal/pipelines_utils
# title: مرافق خطوط الأنابيب
# - local: internal/tokenization_utils
# title: مرافق مقسم النصوص
# title: مرافق مقسم النصوص
# - local: internal/trainer_utils
# title: مرافق المدرب
# - local: internal/generation_utils

View File

@ -0,0 +1,352 @@
# معايير الأداء
<Tip warning={true}>
أدوات قياس الأداء من Hugging Face أصبحت قديمة،ويُنصح باستخدام مكتبات خارجية لقياس سرعة وتعقيد الذاكرة لنماذج Transformer.
</Tip>
[[open-in-colab]]
لنلق نظرة على كيفية تقييم أداء نماذج 🤗 Transformers، وأفضل الممارسات، ومعايير الأداء المتاحة بالفعل.
يُمكن العثور على دفتر ملاحظات يشرح بالتفصيل كيفية قياس أداء نماذج 🤗 Transformers [هنا](https://github.com/huggingface/notebooks/tree/main/examples/benchmark.ipynb).
## كيفية قياس أداء نماذج 🤗 Transformers
تسمح الفئتان [`PyTorchBenchmark`] و [`TensorFlowBenchmark`] بتقييم أداء نماذج 🤗 Transformers بمرونة. تتيح لنا فئات التقييم قياس الأداء قياس _الاستخدام الأقصى للذاكرة_ و _الوقت اللازم_ لكل من _الاستدلال_ و _التدريب_.
<Tip>
هنا، ييُعرَّف _الاستدلال_ بأنه تمريرة أمامية واحدة، ويتم تعريف _التدريب_ بأنه تمريرة أمامية واحدة وتمريرة خلفية واحدة.
</Tip>
تتوقع فئات تقييم الأداء [`PyTorchBenchmark`] و [`TensorFlowBenchmark`] كائنًا من النوع [`PyTorchBenchmarkArguments`] و [`TensorFlowBenchmarkArguments`]، على التوالي، للتنفيذ. [`PyTorchBenchmarkArguments`] و [`TensorFlowBenchmarkArguments`] هي فئات بيانات وتحتوي على جميع التكوينات ذات الصلة لفئة تقييم الأداء المقابلة. في المثال التالي، يتم توضيح كيفية تقييم أداء نموذج BERT من النوع _bert-base-cased_.
<frameworkcontent>
<pt>
```py
>>> from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments
>>> args = PyTorchBenchmarkArguments(models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512])
>>> benchmark = PyTorchBenchmark(args)
```
</pt>
<tf>
```py
>>> from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments
>>> args = TensorFlowBenchmarkArguments(
... models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
... )
>>> benchmark = TensorFlowBenchmark(args)
```
</tf>
</frameworkcontent>
هنا، يتم تمرير ثلاثة معامﻻت إلى فئات بيانات حجة قياس الأداء، وهي `models` و `batch_sizes` و `sequence_lengths`. المعامل `models` مطلوبة وتتوقع `قائمة` من بمعرّفات النموذج من [مركز النماذج](https://huggingface.co/models) تحدد معامﻻت القائمة `batch_sizes` و `sequence_lengths` حجم `input_ids` الذي يتم قياس أداء النموذج عليه. هناك العديد من المعلمات الأخرى التي يمكن تكوينها عبر فئات بيانات معال قياس الأداء. لمزيد من التفاصيل حول هذه المعلمات، يمكنك إما الرجوع مباشرة إلى الملفات `src/transformers/benchmark/benchmark_args_utils.py`، `src/transformers/benchmark/benchmark_args.py` (لـ PyTorch) و `src/transformers/benchmark/benchmark_args_tf.py` (لـ Tensorflow). أو، بدلاً من ذلك، قم بتشغيل أوامر shell التالية من المجلد الرئيسي لطباعة قائمة وصفية بجميع المعلمات القابلة للتكوين لـ PyTorch و Tensorflow على التوالي.
<frameworkcontent>
<pt>
```bash
python examples/pytorch/benchmarking/run_benchmark.py --help
```
يُمكن ببساطة تشغيل كائن التقييم الذي تم تهيئته عن طريق استدعاء `benchmark.run()`.
```py
>>> results = benchmark.run()
>>> print(results)
==================== INFERENCE - SPEED - RESULT ====================
--------------------------------------------------------------------------------
Model Name Batch Size Seq Length Time in s
--------------------------------------------------------------------------------
google-bert/bert-base-uncased 8 8 0.006
google-bert/bert-base-uncased 8 32 0.006
google-bert/bert-base-uncased 8 128 0.018
google-bert/bert-base-uncased 8 512 0.088
--------------------------------------------------------------------------------
==================== INFERENCE - MEMORY - RESULT ====================
--------------------------------------------------------------------------------
Model Name Batch Size Seq Length Memory in MB
--------------------------------------------------------------------------------
google-bert/bert-base-uncased 8 8 1227
google-bert/bert-base-uncased 8 32 1281
google-bert/bert-base-uncased 8 128 1307
google-bert/bert-base-uncased 8 512 1539
--------------------------------------------------------------------------------
==================== ENVIRONMENT INFORMATION ====================
- transformers_version: 2.11.0
- framework: PyTorch
- use_torchscript: False
- framework_version: 1.4.0
- python_version: 3.6.10
- system: Linux
- cpu: x86_64
- architecture: 64bit
- date: 2020-06-29
- time: 08:58:43.371351
- fp16: False
- use_multiprocessing: True
- only_pretrain_model: False
- cpu_ram_mb: 32088
- use_gpu: True
- num_gpus: 1
- gpu: TITAN RTX
- gpu_ram_mb: 24217
- gpu_power_watts: 280.0
- gpu_performance_state: 2
- use_tpu: False
```
</pt>
<tf>
```bash
python examples/tensorflow/benchmarking/run_benchmark_tf.py --help
```
يُمكن بعد ذلك تشغيل كائن قياس الأداء الذي تم تهيئته عن طريق استدعاء `benchmark.run()`.
```py
>>> results = benchmark.run()
>>> print(results)
>>> results = benchmark.run()
>>> print(results)
==================== INFERENCE - SPEED - RESULT ====================
--------------------------------------------------------------------------------
Model Name Batch Size Seq Length Time in s
--------------------------------------------------------------------------------
google-bert/bert-base-uncased 8 8 0.005
google-bert/bert-base-uncased 8 32 0.008
google-bert/bert-base-uncased 8 128 0.022
google-bert/bert-base-uncased 8 512 0.105
--------------------------------------------------------------------------------
==================== INFERENCE - MEMORY - RESULT ====================
--------------------------------------------------------------------------------
Model Name Batch Size Seq Length Memory in MB
--------------------------------------------------------------------------------
google-bert/bert-base-uncased 8 8 1330
google-bert/bert-base-uncased 8 32 1330
google-bert/bert-base-uncased 8 128 1330
google-bert/bert-base-uncased 8 512 1770
--------------------------------------------------------------------------------
==================== ENVIRONMENT INFORMATION ====================
- transformers_version: 202.11.0
- framework: Tensorflow
- use_xla: False
- framework_version: 2.2.0
- python_version: 3.6.10
- system: Linux
- cpu: x86_64
- architecture: 64bit
- date: 2020-06-29
- time: 09:26:35.617317
- fp16: False
- use_multiprocessing: True
- only_pretrain_model: False
- cpu_ram_mb: 32088
- use_gpu: True
- num_gpus: 1
- gpu: TITAN RTX
- gpu_ram_mb: 24217
- gpu_power_watts: 280.0
- gpu_performance_state: 2
- use_tpu: False
```
</tf>
</frameworkcontent>
بشكل افتراضي، يتم تقييم _الوقت_ و _الذاكرة المطلوبة_ لـ _الاستدلال_. في مثال المخرجات أعلاه، يُظهر القسمان الأولان النتيجة المقابلة لـ _وقت الاستدلال_ و اكرة الاستدلال_. بالإضافة إلى ذلك، يتم طباعة جميع المعلومات ذات الصلة حول بيئة الحوسبة، على سبيل المثال نوع وحدة معالجة الرسومات (GPU)، والنظام، وإصدارات المكتبة، وما إلى ذلك، في القسم الثالث تحت _معلومات البيئة_. يمكن حفظ هذه المعلومات بشكل اختياري في ملف _.csv_ عند إضافة المعامل `save_to_csv=True` إلى [`PyTorchBenchmarkArguments`] و [`TensorFlowBenchmarkArguments`] على التوالي. في هذه الحالة، يتم حفظ كل قسم في ملف _.csv_ منفصل. يمكن اختيارًا تحديد مسار كل ملف _.csv_ عبر فئات بيانات معامل قياس الأداء.
بدلاً من تقييم النماذج المدربة مسبقًا عبر معرّف النموذج، على سبيل المثال `google-bert/bert-base-uncased`، يُمكن للمستخدم بدلاً من ذلك قياس أداء تكوين عشوائي لأي فئة نموذج متاحة. في هذه الحالة، يجب إدراج "قائمة" من التكوينات مع معامل قياس الأداء كما هو موضح أدناه.
<frameworkcontent>
<pt>
```py
>>> from transformers import PyTorchBenchmark، PyTorchBenchmarkArguments، BertConfig
>>> args = PyTorchBenchmarkArguments(
... models=["bert-base"، "bert-384-hid"، "bert-6-lay"]، batch_sizes=[8]، sequence_lengths=[8، 32، 128، 512]
... )
>>> config_base = BertConfig()
>>> config_384_hid = BertConfig(hidden_size=384)
>>> config_6_lay = BertConfig(num_hidden_layers=6)
>>> benchmark = PyTorchBenchmark(args، configs=[config_base، config_384_hid، config_6_lay])
>>> benchmark.run()
==================== INFERENCE - SPEED - RESULT ====================
--------------------------------------------------------------------------------
Model Name Batch Size Seq Length Time in s
--------------------------------------------------------------------------------
bert-base 8 128 0.006
bert-base 8 512 0.006
bert-base 8 128 0.018
bert-base 8 512 0.088
bert-384-hid 8 8 0.006
bert-384-hid 8 32 0.006
bert-384-hid 8 128 0.011
bert-384-hid 8 512 0.054
bert-6-lay 8 8 0.003
bert-6-lay 8 32 0.004
bert-6-lay 8 128 0.009
bert-6-lay 8 512 0.044
--------------------------------------------------------------------------------
==================== INFERENCE - MEMORY - RESULT ====================
--------------------------------------------------------------------------------
Model Name Batch Size Seq Length Memory in MB
## نتائج اختبار الأداء
في هذا القسم، يتم قياس _وقت الاستدلال_ و _الذاكرة المطلوبة_ للاستدلال، لمختلف تكوينات `BertModel`. يتم عرض النتائج في جدول، مع تنسيق مختلف قليلاً لكل من PyTorch و TensorFlow.
--------------------------------------------------------------------------------
| اسم النموذج | حجم الدفعة | طول التسلسل | الذاكرة بالميغابايت |
--------------------------------------------------------------------------------
| bert-base | 8 | 8 | 1277 |
| bert-base | 8 | 32 | 1281 |
| bert-base | 8 | 128 | 1307 |
| bert-base | 8 | 512 | 1539 |
| bert-384-hid | 8 | 8 | 1005 |
| bert-384-hid | 8 | 32 | 1027 |
| bert-384-hid | 8 | 128 | 1035 |
| bert-384-hid | 8 | 512 | 1255 |
| bert-6-lay | 8 | 8 | 1097 |
| bert-6-lay | 8 | 32 | 1101 |
| bert-6-lay | 8 | 128 | 1127 |
| bert-6-lay | 8 | 512 | 1359 |
--------------------------------------------------------------------------------
==================== معلومات البيئة ====================
- transformers_version: 2.11.0
- framework: PyTorch
- use_torchscript: False
- framework_version: 1.4.0
- python_version: 3.6.10
- system: Linux
- cpu: x86_64
- architecture: 64bit
- date: 2020-06-29
- time: 09:35:25.143267
- fp16: False
- use_multiprocessing: True
- only_pretrain_model: False
- cpu_ram_mb: 32088
- use_gpu: True
- num_gpus: 1
- gpu: TITAN RTX
- gpu_ram_mb: 24217
- gpu_power_watts: 280.0
- gpu_performance_state: 2
- use_tpu: False
```
</pt>
<tf>
```py
>>> from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments, BertConfig
>>> args = TensorFlowBenchmarkArguments(
... models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
... )
>>> config_base = BertConfig()
>>> config_384_hid = BertConfig(hidden_size=384)
>>> config_6_lay = BertConfig(num_hidden_layers=6)
>>> benchmark = TensorFlowBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
>>> benchmark.run()
==================== نتائج السرعة في الاستدلال ====================
--------------------------------------------------------------------------------
| اسم النموذج | حجم الدفعة | طول التسلسل | الوقت بالثانية |
--------------------------------------------------------------------------------
| bert-base | 8 | 8 | 0.005 |
| bert-base | 8 | 32 | 0.008 |
| bert-base | 8 | 128 | 0.022 |
| bert-base | 8 | 512 | 0.106 |
| bert-384-hid | 8 | 8 | 0.005 |
| bert-384-hid | 8 | 32 | 0.007 |
| bert-384-hid | 8 | 128 | 0.018 |
| bert-384-hid | 8 | 512 | 0.064 |
| bert-6-lay | 8 | 8 | 0.002 |
| bert-6-lay | 8 | 32 | 0.003 |
| bert-6-lay | 8 | 128 | 0.0011 |
| bert-6-lay | 8 | 512 | 0.074 |
--------------------------------------------------------------------------------
==================== نتائج الذاكرة في الاستدلال ====================
--------------------------------------------------------------------------------
| اسم النموذج | حجم الدفعة | طول التسلسل | الذاكرة بالميغابايت |
--------------------------------------------------------------------------------
| اسم النموذج | حجم الدفعة | طول التسلسل | الذاكرة بالميغابايت |
--------------------------------------------------------------------------------
| bert-base | 8 | 8 | 1330 |
| bert-base | 8 | 32 | 1330 |
| bert-base | 8 | 128 | 1330 |
| bert-base | 8 | 512 | 1770 |
| bert-384-hid | 8 | 8 | 1330 |
| bert-384-hid | 8 | 32 | 1330 |
| bert-384-hid | 8 | 128 | 1330 |
| bert-384-hid | 8 | 512 | 1540 |
| bert-6-lay | 8 | 8 | 1330 |
| bert-6-lay | 8 | 32 | 1330 |
| bert-6-lay | 8 | 128 | 1330 |
| bert-6-lay | 8 | 512 | 1540 |
--------------------------------------------------------------------------------
==================== معلومات البيئة ====================
- transformers_version: 2.11.0
- framework: Tensorflow
- use_xla: False
- framework_version: 2.2.0
- python_version: 3.6.10
- system: Linux
- cpu: x86_64
- architecture: 64bit
- date: 2020-06-29
- time: 09:38:15.487125
- fp16: False
- use_multiprocessing: True
- only_pretrain_model: False
- cpu_ram_mb: 32088
- use_gpu: True
- num_gpus: 1
- gpu: TITAN RTX
- gpu_ram_mb: 24217
- gpu_power_watts: 280.0
- gpu_performance_state: 2
- use_tpu: False
```
</tf>
</frameworkcontent>
مرة أخرى، يتم قياس _وقت الاستدلال_ و _الذاكرة المطلوبة_ للاستدلال، ولكن هذه المرة لتكوينات مخصصة لـ `BertModel`. يمكن أن تكون هذه الميزة مفيدة بشكل خاص عند اتخاذ قرار بشأن التكوين الذي يجب تدريب النموذج عليه.
## أفضل الممارسات في اختبار الأداء
يسرد هذا القسم بعض أفضل الممارسات التي يجب مراعاتها عند إجراء اختبار الأداء لنموذج ما.
- حالياً، يتم دعم اختبار الأداء على جهاز واحد فقط. عند إجراء الاختبار على وحدة معالجة الرسوميات (GPU)، يوصى بأن يقوم المستخدم بتحديد الجهاز الذي يجب تشغيل التعليمات البرمجية عليه من خلال تعيين متغير البيئة `CUDA_VISIBLE_DEVICES` في الشل، على سبيل المثال `export CUDA_VISIBLE_DEVICES=0` قبل تشغيل التعليمات البرمجية.
- يجب تعيين الخيار `no_multi_processing` إلى `True` فقط لأغراض الاختبار والتصحيح. ولضمان قياس الذاكرة بدقة، يوصى بتشغيل كل اختبار ذاكرة في عملية منفصلة والتأكد من تعيين `no_multi_processing` إلى `True`.
- يجب دائمًا ذكر معلومات البيئة عند مشاركة نتائج تقييم النموذج. يُمكن أن تختلف النتائج اختلافًا كبيرًا بين أجهزة GPU المختلفة وإصدارات المكتبات، وما إلى ذلك، لذلك فإن نتائج الاختبار بمفردها ليست مفيدة جدًا للمجتمع.
## مشاركة نتائج اختبار الأداء الخاص بك
في السابق، تم إجراء اختبار الأداء لجميع النماذج الأساسية المتاحة (10 في ذلك الوقت) لقياس _وقت الاستدلال_، عبر العديد من الإعدادات المختلفة: باستخدام PyTorch، مع TorchScript وبدونها، باستخدام TensorFlow، مع XLA وبدونه. تم إجراء جميع هذه الاختبارات على وحدات المعالجة المركزية (CPU) (باستثناء XLA TensorFlow) ووحدات معالجة الرسوميات (GPU).
يتم شرح هذا النهج بالتفصيل في [منشور المدونة هذا](https://medium.com/huggingface/benchmarking-transformers-pytorch-and-tensorflow-e2917fb891c2) وتتوفر النتائج [هنا](https://docs.google.com/spreadsheets/d/1sryqufw2D0XlUH4sq3e9Wnxu5EAQkaohzrJbd5HdQ_w/edit?usp=sharing).
مع أدوات اختبار الأداء الجديدة، أصبح من الأسهل من أي وقت مضى مشاركة نتائج اختبار الأداء الخاص بك مع المجتمع:
- [نتائج اختبار الأداء في PyTorch](https://github.com/huggingface/transformers/tree/main/examples/pytorch/benchmarking/README.md).
- [نتائج اختبار الأداء في TensorFlow](https://github.com/huggingface/transformers/tree/main/examples/tensorflow/benchmarking/README.md).

View File

@ -1,66 +0,0 @@
# مجتمع المطورين
هذه الصفحة تجمع الموارد حول 🤗 Transformers التي طورها المجتمع.
## موارد المجتمع:
| المصدر | الوصف | المؤلف |
|:----------|:-------------|------:|
| [Hugging Face Transformers Glossary Flashcards](https://www.darigovresearch.com/huggingface-transformers-glossary-flashcards) | مجموعة من البطاقات التعليمية القائمة على [Transformers Docs Glossary](glossary) والتي تم وضعها في شكل يمكن تعلمه/مراجعته بسهولة باستخدام [Anki](https://apps.ankiweb.net/) وهو تطبيق مفتوح المصدر متعدد المنصات مصمم خصيصًا للاحتفاظ بالمعرفة على المدى الطويل. شاهد هذا [فيديو تمهيدي حول كيفية استخدام البطاقات التعليمية](https://www.youtube.com/watch?v=Dji_7PILrw). | [Darigov Research](https://www.darigovresearch.com/) |
## دفاتر ملاحظات المجتمع:
| الدفتر | الوصف | المؤلف | |
|:----------|:-------------|:-------------|------:|
| [Fine-tune a pre-trained Transformer to generate lyrics](https://github.com/AlekseyKorshuk/huggingartists) | كيفية توليد كلمات الأغاني على غرار فنانك المفضل من خلال ضبط نموذج GPT-2 | [Aleksey Korshuk](https://github.com/AlekseyKorshuk) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AlekseyKorshuk/huggingartists/blob/master/huggingartists-demo.ipynb) |
| [Train T5 in Tensorflow 2](https://github.com/snapthat/TF-T5-text-to-text) | كيفية تدريب T5 لأي مهمة باستخدام Tensorflow 2. يوضح هذا الدفتر مهمة السؤال والجواب المنفذة في Tensorflow 2 باستخدام SQUAD | [Muhammad Harris](https://github.com/HarrisDePerceptron) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/snapthat/TF-T5-text-to-text/blob/master/snapthatT5/notebooks/TF-T5-Datasets%20Training.ipynb) |
| [Train T5 on TPU](https://github.com/patil-suraj/exploring-T5/blob/master/T5_on_TPU.ipynb) | كيفية تدريب T5 على SQUAD مع Transformers و Nlp | [Suraj Patil](https://github.com/patil-suraj) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patil-suraj/exploring-T5/blob/master/T5_on_TPU.ipynb#scrollTo=QLGiFCDqvuil) |
| [Fine-tune T5 for Classification and Multiple Choice](https://github.com/patil-suraj/exploring-T5/blob/master/t5_fine_tuning.ipynb) | كيفية ضبط نموذج T5 للتصنيف والمهام متعددة الخيارات باستخدام تنسيق النص إلى نص مع PyTorch Lightning | [Suraj Patil](https://github.com/patil-suraj) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patil-suraj/exploring-T5/blob/master/t5_fine_tuning.ipynb) |
| [Fine-tune DialoGPT on New Datasets and Languages](https://github.com/ncoop57/i-am-a-nerd/blob/master/_notebooks/2020-05-12-chatbot-part-1.ipynb) | كيفية ضبط نموذج DialoGPT على مجموعة بيانات جديدة لروبوتات الدردشة المحادثية المفتوحة | [Nathan Cooper](https://github.com/ncoop57) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ncoop57/i-am-a-nerd/blob/master/_notebooks/2020-05-12-chatbot-part-1.ipynb) |
| [Long Sequence Modeling with Reformer](https://github.com/patrickvonplaten/notebooks/blob/master/PyTorch_Reformer.ipynb) | كيفية التدريب على تسلسلات طويلة تصل إلى 500,000 رمز باستخدام Reformer | [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/PyTorch_Reformer.ipynb) |
| [Fine-tune BART for Summarization](https://github.com/ohmeow/ohmeow_website/blob/master/posts/2021-05-25-mbart-sequence-classification-with-blurr.ipynb) | كيفية ضبط نموذج BART للتلخيص باستخدام fastai باستخدام blurr | [Wayde Gilliam](https://ohmeow.com/) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ohmeow/ohmeow_website/blob/master/posts/2021-05-25-mbart-sequence-classification-with-blurr.ipynb) |
| [Fine-tune a pre-trained Transformer on anyone's tweets](https://colab.research.google.com/github/borisdayma/huggingtweets/blob/master/huggingtweets-demo.ipynb) | كيفية توليد تغريدات على غرار حساب Twitter المفضل لديك من خلال ضبط نموذج GPT-2 | [Boris Dayma](https://github.com/borisdayma) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/borisdayma/huggingtweets/blob/master/huggingtweets-demo.ipynb) |
| [Optimize 🤗 Hugging Face models with Weights & Biases](https://colab.research.google.com/github/wandb/examples/blob/master/colabs/huggingface/Optimize_Hugging_Face_models_with_Weights_%26_Biases.ipynb) | دليل كامل لعرض تكامل W&B مع Hugging Face | [Boris Dayma](https://github.com/borisdayma) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/wandb/examples/blob/master/colabs/huggingface/Optimize_Hugging_Face_models_with_Weights_%26_Biases.ipynb) |
| [Pretrain Longformer](https://github.com/allenai/longformer/blob/master/scripts/convert_model_to_long.ipynb) | كيفية بناء نسخة "طويلة" من النماذج المسبقة التدريب الموجودة | [Iz Beltagy](https://beltagy.net) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/allenai/longformer/blob/master/scripts/convert_model_to_long.ipynb) |
| [Fine-tune Longformer for QA](https://github.com/patil-suraj/Notebooks/blob/master/longformer_qa_training.ipynb) | كيفية ضبط نموذج Longformer لمهمة QA | [Suraj Patil](https://github.com/patil-suraj) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patil-suraj/Notebooks/blob/master/longformer_qa_training.ipynb) |
| [Evaluate Model with 🤗nlp](https://github.com/patrickvonplaten/notebooks/blob/master/How_to_evaluate_Longformer_on_TriviaQA_using_NLP.ipynb) | كيفية تقييم نموذج Longformer على TriviaQA مع `nlp` | [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1m7eTGlPmLRgoPkkA7rkhQdZ9ydpmsdLE?usp=sharing) |
| [Fine-tune T5 for Sentiment Span Extraction](https://github.com/enzoampil/t5-intro/blob/master/t5_qa_training_pytorch_span_extraction.ipynb) | كيفية ضبط نموذج T5 لاستخراج المشاعر باستخدام تنسيق النص إلى نص مع PyTorch Lightning | [Lorenzo Ampil](https://github.com/enzoampil) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/enzoampil/t5-intro/blob/master/t5_qa_training_pytorch_span_extraction.ipynb) |
| [Fine-tune DistilBert for Multiclass Classification](https://github.com/abhimishra91/transformers-tutorials/blob/master/transformers_multiclass_classification.ipynb) | كيفية ضبط نموذج DistilBert للتصنيف متعدد الفئات باستخدام PyTorch | [Abhishek Kumar Mishra](https://github.com/abhimishra91) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/abhimishra91/transformers-tutorials/blob/master/transformers_multiclass_classification.ipynb)|
|[Fine-tune BERT for Multi-label Classification](https://github.com/abhimishra91/transformers-tutorials/blob/master/transformers_multi_label_classification.ipynb)|كيفية ضبط نموذج BERT للتصنيف متعدد التصنيفات باستخدام PyTorch|[Abhishek Kumar Mishra](https://github.com/abhimishra91) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/abhimishra91/transformers-tutorials/blob/master/transformers_multi_label_classification.ipynb)|
|[Fine-tune T5 for Summarization](https://github.com/abhimishra91/transformers-tutorials/blob/master/transformers_summarization_wandb.ipynb)|كيفية ضبط نموذج T5 للتلخيص في PyTorch وتتبع التجارب باستخدام WandB|[Abhishek Kumar Mishra](https://github.com/abhimishra91) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/abhimishra91/transformers-tutorials/blob/master/transformers_summarization_wandb.ipynb)|
|[Speed up Fine-Tuning in Transformers with Dynamic Padding / Bucketing](https://github.com/ELS-RD/transformers-notebook/blob/master/Divide_Hugging_Face_Transformers_training_time_by_2_or_more.ipynb)|كيفية تسريع الضبط الدقيق بعامل 2 باستخدام الضبط الديناميكي/التقسيم|[Michael Benesty](https://github.com/pommedeterresautee) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1CBfRU1zbfu7-ijiOqAAQUA-RJaxfcJoO?usp=sharing)|
|[Pretrain Reformer for Masked Language Modeling](https://github.com/patrickvonplaten/notebooks/blob/master/Reformer_For_Masked_LM.ipynb)| كيفية تدريب نموذج Reformer مع طبقات الانتباه ثنائية الاتجاه | [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1tzzh0i8PgDQGV3SMFUGxM7_gGae3K-uW?usp=sharing)|
|[Expand and Fine Tune Sci-BERT](https://github.com/lordtt13/word-embeddings/blob/master/COVID-19%20Research%20Data/COVID-SciBERT.ipynb)| كيفية زيادة مفردات نموذج SciBERT المسبق التدريب من AllenAI على مجموعة بيانات CORD وإنشاء خط أنابيب لها. | [Tanmay Thakur](https://github.com/lordtt13) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1rqAR40goxbAfez1xvF3hBJphSCsvXmh8)|
|[Fine Tune BlenderBotSmall for Summarization using the Trainer API](https://github.com/lordtt13/transformers-experiments/blob/master/Custom%20Tasks/fine-tune-blenderbot_small-for-summarization.ipynb)| كيفية ضبط نموذج BlenderBotSmall للتلخيص على مجموعة بيانات مخصصة، باستخدام واجهة برمجة التطبيقات Trainer. | [Tanmay Thakur](https://github.com/lordtt13) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/19Wmupuls7mykSGyRN_Qo6lPQhgp56ymq?usp=sharing)|
|[Fine-tune Electra and interpret with Integrated Gradients](https://github.com/elsanns/xai-nlp-notebooks/blob/master/electra_fine_tune_interpret_captum_ig.ipynb) | كيفية ضبط نموذج Electra للتحليل العاطفي وتفسير التنبؤات باستخدام Captum Integrated Gradients | [Eliza Szczechla](https://elsanns.github.io) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/elsanns/xai-nlp-notebooks/blob/master/electra_fine_tune_interpret_captum_ig.ipynb)|
|[fine-tune a non-English GPT-2 Model with Trainer class](https://github.com/philschmid/fine-tune-GPT-2/blob/master/Fine_tune_a_non_English_GPT_2_Model_with_Huggingface.ipynb) | كيفية ضبط نموذج GPT-2 غير الإنجليزي باستخدام فئة Trainer | [Philipp Schmid](https://www.philschmid.de) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/philschmid/fine-tune-GPT-2/blob/master/Fine_tune_a_non_English_GPT_2_Model_with_Huggingface.ipynb)|
|[Fine-tune a DistilBERT Model for Multi Label Classification task](https://github.com/DhavalTaunk08/Transformers_scripts/blob/master/Transformers_multilabel_distilbert.ipynb) | كيفية ضبط نموذج DistilBERT لمهمة التصنيف متعدد التصنيفات | [Dhaval Taunk](https://github.com/DhavalTaunk08) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/DhavalTaunk08/Transformers_scripts/blob/master/Transformers_multilabel_distilbert.ipynb)|
|[Fine-tune ALBERT for sentence-pair classification](https://github.com/NadirEM/nlp-notebooks/blob/master/Fine_tune_ALBERT_sentence_pair_classification.ipynb) | كيفية ضبط نموذج ALBERT أو أي نموذج آخر قائم على BERT لمهمة التصنيف المزدوج للجمل | [Nadir El Manouzi](https://github.com/NadirEM) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NadirEM/nlp-notebooks/blob/master/Fine_tune_ALBERT_sentence_pair_classification.ipynb)|
|[Fine-tune Roberta for sentiment analysis](https://github.com/DhavalTaunk08/NLP_scripts/blob/master/sentiment_analysis_using_roberta.ipynb) | كيفية ضبط نموذج Roberta للتحليل العاطفي | [Dhaval Taunk](https://github.com/DhavalTaunk08) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/DhavalTaunk08/NLP_scripts/blob/master/sentiment_analysis_using_roberta.ipynb)|
|[Evaluating Question Generation Models](https://github.com/flexudy-pipe/qugeev) | ما مدى دقة الإجابات على الأسئلة التي يولدها نموذجك التحويلي seq2seq؟ | [Pascal Zoleko](https://github.com/zolekode) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1bpsSqCQU-iw_5nNoRm_crPq6FRuJthq_?usp=sharing)|
|[Classify text with DistilBERT and Tensorflow](https://github.com/peterbayerle/huggingface_notebook/blob/main/distilbert_tf.ipynb) | كيفية ضبط نموذج DistilBERT للتصنيف النصي في TensorFlow | [Peter Bayerle](https://github.com/peterbayerle) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/peterbayerle/huggingface_notebook/blob/main/distilbert_tf.ipynb)|
|[Leverage BERT for Encoder-Decoder Summarization on CNN/Dailymail](https://github.com/patrickvonplaten/notebooks/blob/master/BERT2BERT_for_CNN_Dailymail.ipynb) | كيفية البدء السريع لنموذج *EncoderDecoderModel* مع نقطة تفتيش *google-bert/bert-base-uncased* للتلخيص على CNN/Dailymail | [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/BERT2BERT_for_CNN_Dailymail.ipynb)|
|[Leverage RoBERTa for Encoder-Decoder Summarization on BBC XSum](https://github.com/patrickvonplaten/notebooks/blob/master/RoBERTaShared_for_BBC_XSum.ipynb) | كيفية البدء السريع لنموذج *EncoderDecoderModel* المشترك مع نقطة تفتيش *FacebookAI/roberta-base* للتلخيص على BBC/XSum | [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/RoBERTaShared_for_BBC_XSum.ipynb)|
|[Fine-tune TAPAS on Sequential Question Answering (SQA)](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/TAPAS/Fine_tuning_TapasForQuestionAnswering_on_SQA.ipynb) | كيفية ضبط نموذج *TapasForQuestionAnswering* مع نقطة تفتيش *tapas-base* على مجموعة بيانات Sequential Question Answering (SQA) | [Niels Rogge](https://github.com/nielsrogge) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/TAPAS/Fine_tuning_TapasForQuestionAnswering_on_SQA.ipynb)|
|[Evaluate TAPAS on Table Fact Checking (TabFact)](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/TAPAS/Evaluating_TAPAS_on_the_Tabfact_test_set.ipynb) | كيفية تقييم نموذج *TapasForSequenceClassification* المضبوط مسبقًا مع نقطة تفتيش *tapas-base-finetuned-tabfact* باستخدام مزيج من مكتبتي 🤗 datasets و 🤗 transformers | [Niels Rogge](https://github.com/nielsrogge) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/TAPAS/Evaluating_TAPAS_on_the_Tabfact_test_set.ipynb)|
|[Fine-tuning mBART for translation](https://colab.research.google.com/github/vasudevgupta7/huggingface-tutorials/blob/main/translation_training.ipynb) | كيفية ضبط نموذج mBART باستخدام Seq2SeqTrainer للترجمة من الهندية إلى الإنجليزية | [Vasudev Gupta](https://github.com/vasudevgupta7) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vasudevgupta7/huggingface-tutorials/blob/main/translation_training.ipynb)|
|[Fine-tune LayoutLM on FUNSD (a form understanding dataset)](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/LayoutLM/Fine_tuning_LayoutLMForTokenClassification_on_FUNSD.ipynb) | كيفية ضبط نموذج *LayoutLMForTokenClassification* على مجموعة بيانات FUNSD لاستخراج المعلومات من المستندات الممسوحة ضوئيًا | [Niels Rogge](https://github.com/nielsrogge) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/LayoutLM/Fine_tuning_LayoutLMForTokenClassification_on_FUNSD.ipynb)|
|[Fine-Tune DistilGPT2 and Generate Text](https://colab.research.google.com/github/tripathiaakash/DistilGPT2-Tutorial/blob/main/distilgpt2_fine_tuning.ipynb) | كيفية ضبط نموذج DistilGPT2 وتوليد النص | [Aakash Tripathi](https://github.com/tripathiaakash) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/tripathiaakash/DistilGPT2-Tutorial/blob/main/distilgpt2_fine_tuning.ipynb)|
|[Fine-Tune LED on up to 8K tokens](https://github.com/patrickvonplaten/notebooks/blob/master/Fine_tune_Longformer_Encoder_Decoder_(LED)_for_Summarization_on_pubmed.ipynb) | كيفية ضبط نموذج LED على pubmed للتلخيص طويل المدى | [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/Fine_tune_Longformer_Encoder_Decoder_(LED)_for_Summarization_on_pubmed.ipynb)|
|[Evaluate LED on Arxiv](https://github.com/patrickvonplaten/notebooks/blob/master/LED_on_Arxiv.ipynb) | كيفية تقييم نموذج LED للتلخيص طويل المدى بشكل فعال | [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/LED_on_Arxiv.ipynb)|
|[Fine-tune LayoutLM on RVL-CDIP (a document image classification dataset)](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/LayoutLM/Fine_tuning_LayoutLMForSequenceClassification_on_RVL_CDIP.ipynb) | كيفية ضبط نموذج *LayoutLMForSequenceClassification* على مجموعة بيانات RVL-CDIP لتصنيف المستندات الممسوحة ضوئيًا | [Niels Rogge](https://github.com/nielsrogge) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/LayoutLM/Fine_tuning_LayoutLMForSequenceClassification_on_RVL_CDIP.ipynb)|
|[Wav2Vec2 CTC decoding with GPT2 adjustment](https://github.com/voidful/huggingface_notebook/blob/main/xlsr_gpt.ipynb) | كيفية فك تشفير تسلسل CTC مع تعديل نموذج اللغة | [Eric Lam](https://github.com/voidful) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1e_zQHYbO2YKEaUgzb1ww1WwiAyydAj?usp=sharing)|
|[Fine-tune BART for summarization in two languages with Trainer class](https://github.com/elsanns/xai-nlp-notebooks/blob/master/fine_tune_bart_summarization_two_langs.ipynb) | كيفية ضبط نموذج BART للتلخيص بلغتين باستخدام فئة Trainer | [Eliza Szczechla](https://github.com/elsanns) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/elsanns/xai-nlp-notebooks/blob/master/fine_tune_bart_summarization_two_langs.ipynb)|
|[Evaluate Big Bird on Trivia QA](https://github.com/patrickvonplaten/notebooks/blob/master/Evaluating_Big_Bird_on_TriviaQA.ipynb) | كيفية تقييم نموذج BigBird للأسئلة والأجوبة على وثائق طويلة على Trivia QA | [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/Evaluating_Big_Bird_on_TriviaQA.ipynb)|
| [Create video captions using Wav2Vec2](https://github.com/Muennighoff/ytclipcc/blob/main/wav2vec_youtube_captions.ipynb) | كيفية إنشاء تعليقات توضيحية على YouTube من أي فيديو من خلال تفريغ الصوت باستخدام Wav2Vec | [Niklas Muennighoff](https://github.com/Muennighoff) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Muennighoff/ytclipcc/blob/main/wav2vec_youtube_captions.ipynb) |
| [Fine-tune the Vision Transformer on CIFAR-10 using PyTorch Lightning](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/VisionTransformer/Fine_tuning_the_Vision_Transformer_on_CIFAR_10_with_PyTorch_Lightning.ipynb) | كيفية ضبط نموذج Vision Transformer (ViT) على CIFAR-10 باستخدام مكتبات HuggingFace Transformers و Datasets و PyTorch Lightning | [Niels Rogge](https://github.com/nielsrogge) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/VisionTransformer/Fine_tuning_the_Vision_Transformer_on_CIFAR_10_with_PyTorch_Lightning.ipynb) |
| [Fine-tune the Vision Transformer on CIFAR-10 using the 🤗 Trainer](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/VisionTransformer/Fine_tuning_the_Vision_Transformer_on_CIFAR_10_with_the_%F0%9F%A4%97_Trainer.ipynb) | كيفية ضبط نموذج Vision Transformer (ViT) على CIFAR-10 باستخدام مكتبات HuggingFace Transformers و Datasets و 🤗 Trainer | [Niels Rogge](https://github.com/nielsrogge) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/VisionTransformer/Fine_tuning_the_Vision_Transformer_on_CIFAR_10_with_the_%F0%9F%A4%97_Trainer.ipynb) |
| [Evaluate LUKE on Open Entity, an entity typing dataset](https://github.com/studio-ousia/luke/blob/master/notebooks/huggingface_open_entity.ipynb) | كيفية تقييم نموذج *LukeForEntityClassification* على مجموعة بيانات Open Entity | [Ikuya Yamada](https://github.com/ikuyamada) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/studio-ousia/luke/blob/master/notebooks/huggingface_open_entity.ipynb) |
| [Evaluate LUKE on TACRED, a relation extraction dataset](https://github.com/studio-ousia/luke/blob/master/notebooks/huggingface_tacred.ipynb) | كيفية تقييم نموذج *LukeForEntityPairClassification* على مجموعة بيانات TACRED | [Ikuya Yamada](https://github.com/ikuyamada) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/studio-ousia/luke/blob/master/notebooks/huggingface_tacred.ipynb) |
| [Evaluate LUKE on CoNLL-2003, an important NER benchmark](https://github.com/studio-ousia/luke/blob/master/notebooks/huggingface_conll_2003.ipynb) | كيفية تقييم نموذج *LukeForEntitySpanClassification* على مجموعة بيانات CoNLL-2003 | [Ikuya Yamada](https://github.com/ikuyamada) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/studio-ousia/luke/blob/master/notebooks/huggingface_conll_2003.ipynb) |
| [Evaluate BigBird-Pegasus on PubMed dataset](https://github.com/vasudevgupta7/bigbird/blob/main/notebooks/bigbird_pegasus_evaluation.ipynb) | كيفية تقييم نموذج *BigBirdPegasusForConditionalGeneration* على مجموعة بيانات PubMed | [Vasudev Gupta](https://github.com/vasudevgupta7) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vasudevgupta7/bigbird/blob/main/notebooks/bigbird_pegasus_evaluation.ipynb) |
| [Speech Emotion Classification with Wav2Vec2](https://github.com/m3hrdadfi/soxan/blob/main/notebooks/Emotion_recognition_in_Greek_speech_using_Wav2Vec2.ipynb) | كيفية استخدام نموذج Wav2Vec2 المسبق التدريب لتصنيف المشاعر على مجموعة بيانات MEGA | [Mehrdad Farahani](https://github.com/m3hrdadfi) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/m3hrdadfi/soxan/blob/main/notebooks/Emotion_recognition_in_Greek_speech_using_Wav2Vec2.ipynb) |
| [Detect objects in an image with DETR](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/DETR/DETR_minimal_example_(with_DetrFeatureExtractor).ipynb) | كيفية استخدام نموذج *DetrForObjectDetection* المدرب للكشف عن الأجسام في صورة وتصوير الانتباه | [Niels Rogge](https://github.com/NielsRogge) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/DETR/DETR_minimal_example_(with_DetrFeatureExtractor).ipynb) |
| [Fine-tune DETR on a custom object detection dataset](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/DETR/Fine_tuning_DetrForObjectDetection_on_custom_dataset_(balloon).ipynb) | كيفية ضبط نموذج *DetrForObjectDetection* على مجموعة بيانات الكشف عن الأجسام المخصصة | [Niels Rogge](https://github.com/NielsRogge) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/DETR/Fine_tuning_DetrForObjectDetection_on_custom_dataset_(balloon).ipynb) |
| [Finetune T5 for Named Entity Recognition](https://github.com/ToluClassics/Notebooks/blob/main/T5_Ner_Finetuning.ipynb) | كيفية ضبط نموذج *T5* على مهمة التعرف على الكيانات المسماة | [Ogundepo Odunayo](https://github.com/ToluClassics) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1obr78FY_cBmWY5ODViCmzdY6O1KB65Vc?usp=sharing) |
| [Fine-Tuning Open-Source LLM using QLoRA with MLflow and PEFT](https://github.com/mlflow/mlflow/blob/master/docs/source/llms/transformers/tutorials/fine-tuning/transformers-peft.ipynb) | كيفية استخدام [QLoRA](https://github.com/artidoro/qlora) و [PEFT](https://huggingface.co/docs/peft/en/index) لضبط نموذج LLM بطريقة فعالة من حيث الذاكرة، مع استخدام [MLflow](https://mlflow.org/docs/latest/llms/transformers/index.html) لإدارة تتبع التجارب | [Yuki Watanabe](https://github.com/B-Step62) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mlflow/mlflow/blob/master/docs/source/llms/transformers/tutorials/fine-tuning/transformers-peft.ipynb) |

View File

@ -1,163 +0,0 @@
# كيفية تعديل أي نموذج من نماذج Transformers
توفر مكتبة [🤗 Transformers](https://github.com/huggingface/transformers) مجموعة من النماذج المسبقة التدريب والأدوات لمعالجة اللغات الطبيعية، والرؤية، وما إلى ذلك. على الرغم من أن هذه النماذج تغطي مجموعة واسعة من التطبيقات، فقد تواجه حالات استخدام لا تدعمها المكتبة بشكل افتراضي. يُمكن للتخصيص أن يفتح إمكانيات جديدة، مثل إضافة طبقات جديدة، أو تعديل البنية المعمارية، أو تحسين آليات الانتباه. سيُوضح لك هذا الدليل كيفية تعديل نماذج Transformers الموجودة لتلبية احتياجاتك المحددة. الشيء الرائع هو أنك لست بحاجة إلى الخروج من إطار عمل Transformers لإجراء هذه التغييرات. ي يمكنك تعديل النماذج مباشرةً في Transformers والاستفادة من الميزات مثل [واجهة برمجة التطبيقات Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer)، و [PreTrainedModel](https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel)، والضبط الدقيق الفعال باستخدام أدوات مثل [PEFT](https://huggingface.co/docs/peft/index).
سنرشدك في هذا الدليل لكيفية تخصيص نماذج Transformers الموجودة لتلبية متطلباتك، دون فقدان مزايا الإطار. ستتعلم كيفية:
- تعديل بنية نموذج ما من خلال تغيير آلية الانتباه الخاصة به.
- تطبيق تقنيات مثل Low-Rank Adaptation (LoRA) على مكونات نموذج محددة.
نحن نشجعك على المساهمة باختراقاتك الخاصة ومشاركتها هنا مع المجتمع1
## مثال: تعديل آلية الانتباه في نموذج Segment Anything (SAM)
نموذج **Segment Anything (SAM)** هو نموذج رائد في مجال تجزئة الصور. في تنفيذه الافتراضي، يستخدم SAM إسقاطًا مجمعًا للاستعلام والمفتاح والقيمة (`qkv`) في آلية الانتباه الخاصة به. ومع ذلك، قد ترغب في ضبط مكونات محددة فقط من آلية الانتباه، مثل إسقاطات الاستعلام (`q`) والقيمة (`v`)، لتقليل عدد المعلمات القابلة للتدريب والموارد الحسابية المطلوبة.
### الدافع
من خلال تقسيم الإسقاط المجمع `qkv` إلى إسقاطات منفصلة `q` و `k` و `v`، يمكنك تطبيق تقنيات مثل **LoRA** (Low-Rank Adaptation) على إسقاطي `q` و `v` فقط. يسمح لك هذا بما يلي:
- ضبط عدد أقل من المعلمات، مما يقلل من العبء الحسابي.
- تحقيق أداء أفضل من خلال التركيز على مكونات محددة.
- تجربة استراتيجيات تعديل مختلفة في آلية الانتباه.
### التنفيذ
#### **الخطوة 1: إنشاء فئة اهتمام مخصصة**
بعد ذلك، قم بإنشاء فئة فرعية من فئة `SamVisionAttention` الأصلية وعدلها لتضم إسقاطات `q` و `k` و `v` منفصلة.
```python
import torch
import torch.nn as nn
from transformers.models.sam.modeling_sam import SamVisionAttention
class SamVisionAttentionSplit(SamVisionAttention, nn.Module):
def __init__(self, config, window_size):
super().__init__(config, window_size)
del self.qkv
# إسقاطات منفصلة q و k و v
self.q = nn.Linear(config.hidden_size, config.hidden_size, bias=config.qkv_bias)
self.k = nn.Linear(config.hidden_size, config.hidden_size, bias=config.qkv_bias)
self.v = nn.Linear(config.hidden_size, config.hidden_size, bias=config.qkv_bias)
self._register_load_state_dict_pre_hook(self.split_q_k_v_load_hook)
def split_q_k_v_load_hook(self, state_dict, prefix, *args):
keys_to_delete = []
for key in list(state_dict.keys()):
if "qkv." in key:
# تقسيم q و k و v من الإسقاط المجمع
q, k, v = state_dict[key].chunk(3, dim=0)
# استبدال الإسقاطات الفردية q و k و v
state_dict[key.replace("qkv.", "q.")] = q
state_dict[key.replace("qkv.", "k.")] = k
state_dict[key.replace("qkv.", "v.")] = v
# وضع علامة على مفتاح qkv القديم للحذف
keys_to_delete.append(key)
# حذف مفاتيح qkv القديمة
for key in keys_to_delete:
del state_dict[key]
def forward(self, hidden_states: torch.Tensor, output_attentions=False) -> torch.Tensor:
batch_size, height, width, _ = hidden_states.shape
qkv_shapes = (batch_size * self.num_attention_heads, height * width, -1)
query = self.q(hidden_states).reshape((batch_size, height * width,self.num_attention_heads, -1)).permute(0,2,1,3).reshape(qkv_shapes)
key = self.k(hidden_states).reshape((batch_size, height * width,self.num_attention_heads, -1)).permute(0,2,1,3).reshape(qkv_shapes)
value = self.v(hidden_states).reshape((batch_size, height * width,self.num_attention_heads, -1)).permute(0,2,1,3).reshape(qkv_shapes)
attn_weights = (query * self.scale) @ key.transpose(-2, -1)
if self.use_rel_pos:
attn_weights = self.add_decomposed_rel_pos(
attn_weights, query, self.rel_pos_h, self.rel_pos_w, (height, width), (height, width)
)
attn_weights = torch.nn.functional.softmax(attn_weights, dtype=torch.float32, dim=-1).to(query.dtype)
attn_probs = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training)
attn_output = (attn_probs @ value).reshape(batch_size, self.num_attention_heads, height, width, -1)
attn_output = attn_output.permute(0, 2, 3, 1, 4).reshape(batch_size, height, width, -1)
attn_output = self.proj(attn_output)
if output_attentions:
outputs = (attn_output, attn_weights)
else:
outputs = (attn_output, None)
return outputs
```
**الشرح:**
- **الإسقاطات المنفصلة:** يتم إزالة الإسقاط المُجمع `qkv`، وإنشاء إسقاطات خطية منفصلة `q` و `k` و `v`.
- **دالة استدعاء تحميل الأوزان:** تقوم طريقة `_split_qkv_load_hook` بتقسيم أوزان `qkv` المسبقة التدريب إلى أوزان `q` و `k` و `v` منفصلة عند تحميل النموذج. يضمن هذا التوافق مع أي نموذج مسبق التدريب.
- **التنفيذ الأمامي:** يتم حساب الاستعلامات والمفاتيح والقيم بشكل منفصل، وتستمر آلية الانتباه كالمعتاد.
#### **الخطوة 2: استبدال فئة الانتباه الأصلية**
استبدل فئة `SamVisionAttention` الأصلية بفئتك المخصصة بحيث يستخدم النموذج آلية الانتباه المعدلة.
```python
from transformers import SamModel
from transformers.models.sam import modeling_sam
# استبدال فئة الاهتمام في وحدة نمطية modeling_sam
modeling_sam.SamVisionAttention = SamVisionAttentionSplit
# تحميل نموذج SAM المسبق التدريب
model = SamModel.from_pretrained("facebook/sam-vit-base")
```
**الشرح:**
- **استبدال الفئة:** من خلال تعيين فئتك المخصصة إلى `modeling_sam.SamVisionAttention`، فإن أي حالات من فئة `SamVisionAttention` في النموذج ستستخدم النسخة المعدلة. وبالتالي، عند استدعاء `SamModel`، سيتم استخدام `SamVisionAttentionSplit` المحددة حديثًا.
- **تحميل النموذج:** يتم تحميل النموذج باستخدام `from_pretrained`، ويتم دمج آلية الانتباه المخصصة.
#### **الخطوة 3: تطبيق LoRA على إسقاطات محددة**
مع وجود إسقاطات `q` و `k` و `v` منفصلة، يمكنك الآن تطبيق LoRA على مكونات محددة، مثل إسقاطات `q` و `v`.
```python
from peft import LoraConfig, get_peft_model
config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q", "v"], # تطبيق LoRA على إسقاطات q و v
lora_dropout=0.1,
task_type="mask-generation"
)
# تطبيق LoRA على النموذج
model = get_peft_model(model, config)
```
**الشرح:**
- **تكوين LoRA:** تحدد `LoraConfig` المرتبة `r`، وعامل القياس `lora_alpha`، والوحدات المستهدفة (`"q"` و `"v"`)، ومعدل التخلي، ونوع المهمة.
- **تطبيق LoRA:** تقوم دالة `get_peft_model` بتطبيق LoRA على الوحدات المحددة في النموذج.
- **تقليل المعلمات:** من خلال التركيز على `q` و `v`، فإنك تقلل عدد المعلمات القابلة للتدريب، مما يؤدي إلى تسريع التدريب وتقليل استخدام الذاكرة.
#### **الخطوة 4: التحقق من عدد المعلمات القابلة للتدريب**
من السهل التحقق من عدد المعلمات القابلة للتدريب ومعرفة تأثير تعديلك.
```python
model.print_trainable_parameters()
```
**الناتج المتوقع:**
```
عدد المعلمات القابلة للتدريب: 608,256 || جميع المعلمات: 94,343,728 || نسبة المعلمات القابلة للتدريب: 0.6447
عدد المعلمات القابلة للتدريب: 912,384 || جميع المعلمات: 94,647,856 || نسبة المعلمات القابلة للتدريب: 0.9640 # مع k
```
## المساهمة بابداعاتك الخاصة
يمكن لتعديل النماذج المسبقة التدريب أن يفتح آفاقًا جديدة للبحث والتطبيق. من خلال فهم وتعديل الآليات الداخلية للنماذج مثل SAM، يمكنك تخصيصها لتلبية احتياجاتك المحددة، وتحسين الأداء، وتجربة أفكار جديدة.
إذا قمت بتطوير تعديﻻتك الخاصة لنماذج Transformers وترغب في مشاركتها، ففكر في المساهمة في هذه الوثيقة.
- **إنشاء طلب سحب (Pull Request):** شارك تغييراتك وتحسيناتك في التعليمات البرمجية مباشرة في المستودع.
- **كتابة التوثيق:** قدم تفسيرات وأمثلة واضحة لتعديلاتك.
- **التفاعل مع المجتمع:** ناقش أفكارك واحصل على تعليقات من المطورين والباحثين الآخرين من خلال فتح مشكلة.

View File

@ -1,184 +0,0 @@
# المحولات النمطية
مكتبة `transformers` هي إطار عمل ذو فلسفة محدد؛ يتم تعريف فلسفتنا في [الدليل المفاهيمي](./philosophy).
جوهر هذه الفلسفة يتمثل في مبدأ [نموذج واحد، ملف واحد](https://huggingface.co/blog/transformers-design-philosophy)
في المكتبة. الجانب السلبي لهذا المكون هو تقييده لوراثة واستيراد مكونات الملفات.
نتيجة لذلك، تتكرر مكونات النموذج عبر العديد من الملفات. يحتوي `transformers` على عدد كبير من طبقات الانتباه، يقارب عدد النماذج، والكثير منها متطابق. يتسبب هذا في تباعد عمليات التنفيذ المستقلة مع تطبيق الإصلاحات والتغييرات.
على أجزاء محددة من التعليمات البرمجية.
ولمعالجة ذلك، اعتمدنا مفهوم "النسخ" في المكتبة. فبإضافة تعليق يُشير إلى أن التعليمات البرمجية هي نسخة من أخرى، نضمن من خلال أنظمة CI والأوامر المحلية عدم تباعد النسخ. لكن هذه العملية، رغم بساطتها، تُسبب إرهاقاً. كما أنها تزيد العبء على المساهمين، وهو ما نهدف إلى تجاوزه.
غالباً ما تتطلب مساهمات النماذج إضافة تعليمات برمجية (حوالي 1000 سطر)، ومعالج (حوالي 500 سطر)، واختبارات، ووثائق، إلخ. ونادراً ما تقل مساهمات النماذج عن 3000-5000 سطر من التعليمات البرمجية، معظمها أكواد نمطية. هذا يرفع مستوى المساهمات،
ونهدف مع المحولات النمطية إلى خفض هذا المستوى إلى حدّ مقبول.
## ما هو؟
تقدم المحولات النمطية مفهوم ملف "نمطي" لمجلد نموذج. يقبل هذا الملف النمطي تعليمات برمجية
غير مقبولة عادة في ملفات النمذجة/المعالجة، حيث يسمح بالاستيراد من نماذج مجاورة وكذلك
الوراثة من الفئات إلى فئات أخرى.
يعرّف هذا الملف النمطي النماذج والمعالجات وفئة التكوين التي سيتم تعريفها في وحداتهم
المتعلقة.
وأخيرًا، يقدم هذا الميزة أداة `linter` جديدة والتي ستعمل على "تفكيك" الملف النمطي إلى بنية "نموذج واحد، ملف واحد"
هيكل الدليل. سيتم إنشاء هذه الملفات تلقائيًا في كل مرة يتم فيها تشغيل البرنامج النصي؛ مما يقلل من المساهمات المطلوبة
إلى الملف النمطي، وبالتالي فقط إلى التغييرات بين النموذج المساهم والنماذج الأخرى.
سيقوم مستخدمو النموذج في النهاية باستيراد واستخدام واجهة الملف الواحد، لذا لا يتوقع حدوث أي تغيير هنا. من خلال القيام بذلك،
نأمل في الجمع بين أفضل ما في العالمين: تمكين المساهمات البسيطة مع الالتزام بفلسفتنا.
لذلك، هذا بديل لعلامات `# Copied from`، ويمكن توقع انتقال النماذج المساهمة سابقًا إلى
تنسيق المحولات النمطية الجديد في الأشهر المقبلة.
### التفاصيل
تُبسط أداة "linter" الوراثة، مُنشئةً جميع الملفات المفردة من الملف النمطي، مع الحفاظ على شفافيتها أمام مستخدمي Python. حاليًا، تُبسط الأداة مستوىً واحدًا من الوراثة
على سبيل المثال:
- إذا ورثت فئة التكوين من فئة أخرى وأضافت/حذفت معامل، فسيتم إما الإشارة إلى الملف المولد مباشرةً
(في حالة الإضافة) أو إزالته تمامًا (في حالة الحذف).
- إذا ورثت فئة من فئة أخرى، على سبيل المثال: `class GemmaModel(LlamaModel):`، تُستنتج التبعيات تلقائيًا
سيتم استنتاج جميع الوحدات الفرعية تلقائيًا من الفئة الأصلية.
- إذا قمت بتعريف وظائف جديدة في الملف `modular` واستخدمتها داخل الفئات، فستستنتج أداة linter ذلك تلقائيًا
يجب أن تكون قادرًا على كتابة كل شيء (المجزىء اللغوي، ومُعالِج الصور، والنموذج، والتكوين) في الملف `modular`، وسيتم إنشاء الملفات المُقابلة تلقائيًا.
### التطبيق
[TODO] نقدم اختبارًا جديدًا، للتأكد من أن المحتوى المولد يتطابق مع ما هو موجود في `modular_xxxx.py`
### الأمثلة
هنا مثال سريع باستخدام BERT و RoBERTa. النموذجان مرتبطان ارتباطًا وثيقًا: يختلف تنفيذهما النموذجي في طبقة تضمين.
بدلاً من إعادة تعريف النموذج بالكامل، إليك كيف يبدو ملف `modular_roberta.py` لفئات النمذجة والتكوين (لأغراض المثال، يتم تجاهل المجزىء اللغوي في هذا الوقت حيث أنه مختلف جدًا).
```python
from torch import nn
from ..bert.configuration_bert import BertConfig
from ..bert.modeling_bert import (
BertModel,
BertEmbeddings,
BertForMaskedLM
)
# تكوين RoBERTa مطابق لتكوين BERT
class RobertaConfig(BertConfig):
model_type = 'roberta'
# نعيد تعريف الإضافات هنا لتسليط الضوء على اختلاف معرف الحشو، ونعيد تعريف الإضافات الموضعية
class RobertaEmbeddings(BertEmbeddings):
def __init__(self, config):
super().__init__(config())
self.padding_idx = config.pad_token_id
self.position_embeddings = nn.Embedding(
config.max_position_embeddings, config.hidden_size, padding_idx=self.padding_idx
)
# نموذج RoBERTa مطابق لنموذج BERT، باستثناء طبقة الإضافات.
# نعيد تعريف الإضافات أعلاه، لذا هنا لا توجد حاجة لعمل إضافي
class RobertaModel(BertModel):
def __init__(self, config):
super().__init__(config)
self.embeddings = RobertaEmbeddings(config)
# الرؤوس الآن تحتاج فقط إلى إعادة تعريف النموذج داخل `RobertaModel` الصحيح
class RobertaForMaskedLM(BertForMaskedLM):
def __init__(self, config):
super().__init__(config)
self.model = RobertaModel(config)
```
لاحظ أنه إذا لم تستخدم الاعتماد الذي حددته، فستحصل على الخطأ التالي:
```bash
ValueError: You defined `RobertaEmbeddings` in the modular_roberta.py, it should be used
when you define `BertModel`, as it is one of it's direct dependencies. Make sure
you use it in the `__init__` function.
```
بالإضافة إلى ذلك، قد تجد قائمة بالأمثلة هنا:
## ما هو ليس كذلك
ليس بديلاً لتعليمات برمجة النمذجة (بعد؟)، وإذا لم يكن نموذجك يعتمد على أي شيء آخر موجود من قبل، فيمكنك إضافة ملف `نمذجة` كالعادة.
## الاستخدام المتقدم
### إزالة السمات والوظائف
لإزالة السمات التي لا تستخدم في نموذجك النمطي، والتي لا تريد رؤيتها في النمذجة المفككة:
```python
class GemmaModel(LlamaModel): | class GemmaModel(PreTrainedModel):
def __init__(self, config): | def __init__(self, config):
super().__init__(self, eos_token) | super().__init__(config)
del self.embed_tokens | self.padding_idx = config.pad_token_id
| self.vocab_size = config.vocab_size
|
| self.layers = nn.ModuleList(
| [LlamaDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
| )
| self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
| self.rotary_emb = LlamaRotaryEmbedding(config=config)
| self.gradient_checkpointing = False
|
| # Initialize weights and apply final processing
| self.post_init()
```
إذا قمت بالتحقق من `LlamaModel` الأصلي، فستجد `embed_tokens` الذي تمت إزالته هنا (كما هو متوقع!)
إزالة وظيفة مشابهة، تحتاج فقط إلى كتابتها مع `raise ValueError("")` لمحاكاة السلوك الذي تريده فعليًا عند إزالة وظيفة أصلية في بايثون.
```python
class GemmaTokenizer(LlamaTokenizer):
...
def get_spm_processor(self):
raise AttributeError("Not needed for Gemma")
def unk_token_length(self):
raise AttributeError("Not needed for Gemma")
```
### تعريف وظائف جديدة
إذا قمت بتعريف وظيفة جديدة في الملف `modular` لاستخدامها داخل فئة، على سبيل المثال
```python
def my_new_function(*args, **kwargs):
# Do something here
pass
class GemmaModel(LlamaModel):
def forward(*args, **kwargs):
# Call the function
example = my_new_function(*args, **kwargs)
# continue here
```
سيتم نسخ وظيفة `my_new_function` (وبشكل متكرر، أي وظائف أخرى جديدة يتم استدعاؤها في جسمها) تلقائيًا
في الملف الذي يتم استخدامه.
### استدعاء `super()`
قمنا مؤخرًا بشحن بعض الميزات التي تسمح لك بالانتقال من:
```python
class GemmaTokenizer(LlamaTokenizer, PretrainedTokenizerFast): | class GemmaModel(nn.Module):
def __init__(self, eos_token="</s>"): | def __init__(self):
eos_token = AddedToken(eos_token) | eos_token = AddedToken(eos_token)
PretrainedTokenizerFast.__init__(self, eos_token) | super().__init__(eos_token)
```
هذا مفيد عندما لا تريد تفكيك استدعاء `super()`، وتريد التمييز بين أي استدعاء super init تقوم به!
### التسمية الخاصة
ندعم الآن أيضًا حالات خاصة مثل
```python
class GemmaVisionModel(CLIPModel):
pass
```
حيث اسم فئة `GemmaVision` الخاصة بك ليس هو نفسه `Gemma` النمطي. هذا مفيد للغاية للنماذج المركبة.

View File

@ -130,6 +130,7 @@
| دفتر الملاحظات | الوصف | | |
|:----------|:-------------|:-------------|------:|
| [كيفية تكميم نموذج باستخدام ONNX Runtime لتصنيف النص](https://github.com/huggingface/notebooks/blob/main/examples/text_classification_quantization_ort.ipynb)| يوضح كيفية تطبيق التكميم الثابت والديناميكي على نموذج باستخدام [ONNX Runtime](https://github.com/microsoft/onnxruntime) لأي مهمة GLUE. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification_quantization_ort.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/text_classification_quantization_ort.ipynb)|
| [كيفية تكميم نموذج باستخدام Intel Neural Compressor لتصنيف النص](https://github.com/huggingface/notebooks/blob/main/examples/text_classification_quantization_inc.ipynb)| يوضح كيفية تطبيق التكميم الثابت والديناميكي والتدريبي على نموذج باستخدام [Intel Neural Compressor (INC)](https://github.com/intel/neural-compressor) لأي مهمة GLUE. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification_quantization_inc.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/text_classification_quantization_inc.ipynb)|
| [كيفية ضبط نموذج بدقة على تصنيف النص باستخدام ONNX Runtime](https://github.com/huggingface/notebooks/blob/main/examples/text_classification_ort.ipynb)| يوضح كيفية معالجة البيانات مسبقًا وضبط نموذج بدقة على أي مهمة GLUE باستخدام [ONNX Runtime](https://github.com/microsoft/onnxruntime). | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification_ort.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/text_classification_ort.ipynb)|
| [كيفية ضبط نموذج بدقة على التلخيص باستخدام ONNX Runtime](https://github.com/huggingface/notebooks/blob/main/examples/summarization_ort.ipynb)| يوضح كيفية معالجة البيانات مسبقًا وضبط نموذج بدقة على XSUM باستخدام [ONNX Runtime](https://github.com/microsoft/onnxruntime). | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/summarization_ort.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/summarization_ort.ipynb)|

View File

@ -347,8 +347,8 @@ tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],
```py
>>> from transformers import AutoModel
>>> tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
>>> pt_model = AutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
>>> tokenizer = AutoTokenizer.from_pretrained(tf_save_directory)
>>> pt_model = AutoModelForSequenceClassification.from_pretrained(tf_save_directory, from_tf=True)
```
</pt>
<tf>
@ -356,8 +356,8 @@ tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],
```py
>>> from transformers import TFAutoModel
>>> tokenizer = AutoTokenizer.from_pretrained(tf_save_directory)
>>> tf_model = TFAutoModelForSequenceClassification.from_pretrained(tf_save_directory, from_tf=True)
>>> tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
>>> tf_model = TFAutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
```
</tf>
</frameworkcontent>

View File

@ -1,422 +0,0 @@
<!--Copyright 2022 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# نمذجة اللغة السببية (Causal language modeling)
[[open-in-colab]]
هناك نوعان من نمذجة اللغة، السببية والمقنعة. يوضح هذا الدليل نمذجة اللغة السببية.
تُستخدم نماذج اللغة السببية غالبًا لتوليد النص. يمكنك استخدام هذه النماذج للتطبيقات الإبداعية مثل
اختيار مغامرة النص الخاصة بك أو مساعد ترميز ذكي مثل Copilot أو CodeParrot.
<Youtube id="Vpjb1lu0MDk"/>
تتنبأ نمذجة اللغة السببية بالرمز التالي في تسلسل من الرموز، ولا يمكن للنموذج سوى الاهتمام بالرموز على
اليسار. هذا يعني أن النموذج لا يمكنه رؤية الرموز المستقبلية. GPT-2 هو مثال على نموذج اللغة السببية.
سيوضح لك هذا الدليل كيفية:
1. ضبط دقيق [DistilRoBERTa](https://huggingface.co/distilbert/distilroberta-base) على مجموعة فرعية [r/askscience](https://www.reddit.com/r/askscience/) من مجموعة بيانات [ELI5](https://huggingface.co/datasets/eli5).
2. استخدام النموذج المدرب الخاص بك للاستنتاج.
<Tip>
لرؤية جميع العمارات ونقاط التحقق المتوافقة مع هذه المهمة، نوصي بالتحقق من [task-page](https://huggingface.co/tasks/text-generation)
</Tip>
قبل أن تبدأ، تأكد من تثبيت جميع المكتبات الضرورية:
```bash
pip install transformers datasets evaluate
```
نحن نشجعك على تسجيل الدخول إلى حساب Hugging Face الخاص بك حتى تتمكن من تحميل ومشاركة نموذجك مع المجتمع. عند المطالبة، أدخل رمزك لتسجيل الدخول:
```py
>>> from huggingface_hub import notebook_login
>>> notebook_login()
```
## تحميل مجموعة بيانات ELI5
ابدأ بتحميل أول 5000 مثال من [ELI5-Category](https://huggingface.co/datasets/eli5_category) مجموعة البيانات مع مكتبة 🤗 Datasets. سيعطيك هذا فرصة للتجربة والتأكد من أن كل شيء يعمل قبل قضاء المزيد من الوقت في التدريب على مجموعة البيانات الكاملة.
```py
>>> from datasets import load_dataset
>>> eli5 = load_dataset("eli5_category", split="train[:5000]")
```
قم بتقسيم مجموعة بيانات `train` إلى مجموعتي تدريب واختبار باستخدام الخاصية [`~datasets.Dataset.train_test_split`]:
```py
>>> eli5 = eli5.train_test_split(test_size=0.2)
```
ثم ألق نظرة على مثال:
```py
>>> eli5["train"][0]
{'q_id': '7h191n',
'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
'selftext': '',
'category': 'Economics',
'subreddit': 'explainlikeimfive',
'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
'score': [21, 19, 5, 3],
'text_urls': [[],
[],
[],
['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
'title_urls': ['url'],
'selftext_urls': ['url']}
```
على الرغم من أن هذا قد يبدو معقدًا، إلا أنك مهتم حقًا بحقل `text`. ما هو رائع حول مهام نمذجة اللغة
أنت لا تحتاج إلى تسميات (تُعرف أيضًا باسم المهمة غير الخاضعة للإشراف) لأن الكلمة التالية تعمل كتسمية.
## معالجة مسبقة (Preprocess)
<Youtube id="ma1TrR7gE7I"/>
الخطوة التالية هي تحميل مجزء النص DistilGPT2 لمعالجة حقل `text` الفرعي:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
```
ستلاحظ من المثال أعلاه، الحقل `text` هو في الواقع متداخل داخل `answers`. هذا يعني أنك ستحتاج إلى
استخراج حقل `text` الفرعي من بنيته المتداخلة باستخدام الدالة [`flatten`](https://huggingface.co/docs/datasets/process#flatten):
```py
>>> eli5 = eli5.flatten()
>>> eli5["train"][0]
{'q_id': '7h191n',
'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
'selftext': '',
'category': 'Economics',
'subreddit': 'explainlikeimfive',
'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
'answers.score': [21, 19, 5, 3],
'answers.text_urls': [[],
[],
[],
['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
'title_urls': ['url'],
'selftext_urls': ['url']}
```
كل حقل فرعي هو الآن عموداً منفصلاً مسبوقاً بـ `answers`، وحقل `text` هو قائمة الآن. بدلاً من ذلك
من تجزائة نص كل جملة بشكل منفصل، قم بتحويل القائمة إلى سلسلة حتى تتمكن من تجزئة نصها بشكل مجمّع.
هنا أول دالة معالجة مسبقة لدمج قائمة السلاسل لكل مثال ومجزىء النتيجة:
```py
>>> def preprocess_function(examples):
... return tokenizer([" ".join(x) for x in examples["answers.text"]])
```
لتطبيق دالة المعالجة المسبقة هذه على مجموعة البيانات بأكملها، استخدم الدالة 🤗 Datasets [`~datasets.Dataset.map`]. يمكنك تسريع هذه العملية `map` عن طريق تعيين `batched=True` لمعالجة عناصر متعددة من مجموعة البيانات في وقت واحد، وزيادة عدد العمليات مع `num_proc`. احذف أي أعمدة لا تحتاجها:
```py
>>> tokenized_eli5 = eli5.map(
... preprocess_function,
... batched=True,
... num_proc=4,
... remove_columns=eli5["train"].column_names,
... )
```
تحتوي هذه المجموعة من البيانات على تسلسلات الرموز، ولكن بعضها أطول من الطول الأقصى للمدخلات للنموذج.
يمكنك الآن استخدام دالة ما قبل المعالجة ثانية لـ:
- تجميع كل التسلسلات.
- تقسيم التسلسلات المجمّعة إلى أجزاء أقصر محددة، بحجم `block_size`، والتي يجب أن تكون أقصر من الطول الأقصى للمدخلات ومناسبة لذاكرة GPU.
```py
>>> block_size = 128
>>> def group_texts(examples):
... # ربط جميع النصوص.
... concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
... total_length = len(concatenated_examples[list(examples.keys())[0]])
... # نتجاهل الباقي الصغير، يمكننا إضافة الحشو إذا كان النموذج يدعمه بدلاً من هذا الإسقاط، يمكنك
... # تخصيص هذا الجزء حسب احتياجاتك.
... if total_length >= block_size:
... total_length = (total_length // block_size) * block_size
... # التقسيم إلى أجزاء بحجم block_size.
... result = {
... k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
... for k, t in concatenated_examples.items()
... }
... result["labels"] = result["input_ids"].copy()
... return result
```
طبق دالة `group_texts` على كامل المجموعة من البيانات:
```py
>>> lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
```
الآن قم بإنشاء دفعة من الأمثلة باستخدام [`DataCollatorForLanguageModeling`]. من الأفضل أن تقوم بـ *الحشو الديناميكي* للجمل إلى الطول الأطول في الدفعة أثناء التجميع، بدلاً من حشو كامل المجموعة من البيانات إلى الطول الأقصى.
<frameworkcontent>
<pt>
استخدم رمز نهاية التسلسل كرمز للحشو، وحدد `mlm_probability` لحجب الرموز بشكل عشوائي عند كل تكرار للبيانات:
```py
>>> from transformers import DataCollatorForLanguageModeling
>>> tokenizer.pad_token = tokenizer.eos_token
>>> data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
```
</pt>
<tf>
استخدم رمز نهاية التسلسل كرمز للحشو، وحدد `mlm_probability` لحجب الرموز بشكل عشوائي عند كل تكرار للبيانات:
```py
>>> from transformers import DataCollatorForLanguageModeling
>>> data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False, return_tensors="tf")
```
</tf>
</frameworkcontent>
## التدريب (Train)
<frameworkcontent>
<pt>
<Tip>
إذا لم تكن على دراية بتدريب نموذج باستخدام [`Trainer`], اطلع على [البرنامج التعليمي الأساسي](../training#train-with-pytorch-trainer)!
</Tip>
أنت جاهز الآن لبدء تدريب نموذجك! قم بتحميل DistilGPT2 باستخدام [`AutoModelForCausalLM`]:
```py
>>> from transformers import AutoModelForCausalLM, TrainingArguments, Trainer
>>> model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
```
في هذه المرحلة، تبقى ثلاث خطوات فقط:
1. حدد معلمات التدريب الخاصة بك في [`TrainingArguments`]. المعامل الوحيد المطلوب هو `output_dir` الذي يحدد أين سيتم حفظ نموذجك. ستقوم بدفع هذا النموذج إلى Hub بتحديد `push_to_hub=True` (يجب أن تكون مسجلاً الدخول إلى Hugging Face لتحميل نموذجك).
2. قم بتمرير معاملات التدريب إلى [`Trainer`] إلى جانب النموذج، والمجموعات من البيانات، ومجمّع البيانات.
3. قم باستدعاء [`~Trainer.train`] لتدريب نموذجك.
```py
>>> training_args = TrainingArguments(
... output_dir="my_awesome_eli5_clm-model",
... eval_strategy="epoch",
... learning_rate=2e-5,
... weight_decay=0.01,
... push_to_hub=True,
... )
>>> trainer = Trainer(
... model=model,
... args=training_args,
... train_dataset=lm_dataset["train"],
... eval_dataset=lm_dataset["test"],
... data_collator=data_collator,
... tokenizer=tokenizer,
... )
>>> trainer.train()
```
بمجرد اكتمال التدريب، استخدم طريقة [`~transformers.Trainer.evaluate`] لتقييم نموذجك والحصول على احتمالية الارتباك:
```py
>>> import math
>>> eval_results = trainer.evaluate()
>>> print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
Perplexity: 49.61
```
ثم شارك نموذجك على Hub باستخدام طريقة [`~transformers.Trainer.push_to_hub`] حتى يتمكن الجميع من استخدام نموذجك:
```py
>>> trainer.push_to_hub()
```
</pt>
<tf>
<Tip>
إذا لم تكن على دراية بتدريب نموذج باستخدام Keras، اطلع على [البرنامج التعليمي الأساسي](../training#train-a-tensorflow-model-with-keras)!
</Tip>
لتدريب نموذج في TensorFlow، ابدأ بإعداد دالة المحسن، وجدول معدل التعلم، وبعض معاملات التدريب:
```py
>>> from transformers import create_optimizer, AdamWeightDecay
>>> optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
```
ثم يمكنك تحميل DistilGPT2 باستخدام [`TFAutoModelForCausalLM`]:
```py
>>> from transformers import TFAutoModelForCausalLM
>>> model = TFAutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
```
حول مجموعات بياناتك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
```py
>>> tf_train_set = model.prepare_tf_dataset(
... lm_dataset["train"],
... shuffle=True,
... batch_size=16,
... collate_fn=data_collator,
... )
>>> tf_test_set = model.prepare_tf_dataset(
... lm_dataset["test"],
... shuffle=False,
... batch_size=16,
... collate_fn=data_collator,
... )
```
قم بتهيئة النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن جميع نماذج Transformers لديها دالة خسارة ذات صلة بالمهمة الافتراضية، لذلك لا تحتاج إلى تحديد واحدة ما لم ترغب في ذلك:
```py
>>> import tensorflow as tf
>>> model.compile(optimizer=optimizer) # لا يوجد حجة للخسارة!
```
يمكن القيام بذلك عن طريق تحديد مكان دفع نموذجك ومجمّع البيانات في [`~transformers.PushToHubCallback`]:
```py
>>> from transformers.keras_callbacks import PushToHubCallback
>>> callback = PushToHubCallback(
... output_dir="my_awesome_eli5_clm-model",
... tokenizer=tokenizer,
... )
```
أخيراً، أنت جاهز لبدء تدريب نموذجك! قم باستدعاء [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق من الصحة، وعدد العصور، والتعليقات الخاصة بك لتدريب النموذج:
```py
>>> model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
```
بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
</tf>
</frameworkcontent>
<Tip>
للحصول على مثال أكثر تعمقًا حول كيفية تدريب نموذج للنمذجة اللغوية السببية، اطلع على الدفتر المقابل
[دفتر PyTorch](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)
أو [دفتر TensorFlow](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling-tf.ipynb).
</Tip>
## الاستدلال (Inference)
رائع، الآن بعد أن قمت بتدريب نموذج، يمكنك استخدامه للاستدلال!
قم بابتكار سؤال تود توليد نص منه:
```py
>>> prompt = "Somatic hypermutation allows the immune system to"
```
أبسط طريقة لتجربة نموذجك المدرب للاستدلال هي استخدامه في [`pipeline`]. قم بتنفيذ `pipeline` لتوليد النص مع نموذجك، ومرر نصك إليه:
```py
>>> from transformers import pipeline
>>> generator = pipeline("text-generation", model="username/my_awesome_eli5_clm-model")
>>> generator(prompt)
[{'generated_text': "Somatic hypermutation allows the immune system to be able to effectively reverse the damage caused by an infection.\n\n\nThe damage caused by an infection is caused by the immune system's ability to perform its own self-correcting tasks."}]
```
<frameworkcontent>
<pt>
قسم النص وإرجع `input_ids` كتنسورات PyTorch:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
>>> inputs = tokenizer(prompt, return_tensors="pt").input_ids
```
استخدم طريقة [`~generation.GenerationMixin.generate`] لتوليد النص.
للمزيد من التفاصيل حول استراتيجيات توليد النص المختلفة والبارامترات للتحكم في التوليد، راجع صفحة [استراتيجيات توليد النص](../generation_strategies).
```py
>>> from transformers import AutoModelForCausalLM
>>> model = AutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
>>> outputs = model.generate(inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
```
فك ترميز الرموز المولدة مرة أخرى إلى نص:
```py
>>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
["Somatic hypermutation allows the immune system to react to drugs with the ability to adapt to a different environmental situation. In other words, a system of 'hypermutation' can help the immune system to adapt to a different environmental situation or in some cases even a single life. In contrast, researchers at the University of Massachusetts-Boston have found that 'hypermutation' is much stronger in mice than in humans but can be found in humans, and that it's not completely unknown to the immune system. A study on how the immune system"]
```
</pt>
<tf>
قم بتقسيم النص وإرجاع `input_ids` كـ TensorFlow tensors:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
>>> inputs = tokenizer(prompt, return_tensors="tf").input_ids
```
استخدم طريقة [`~transformers.generation_tf_utils.TFGenerationMixin.generate`] لإنشاء الملخص. للمزيد من التفاصيل حول استراتيجيات توليد النص المختلفة والبارامترات للتحكم في التوليد، راجع صفحة [استراتيجيات توليد النص](../generation_strategies).
```py
>>> from transformers import TFAutoModelForCausalLM
>>> model = TFAutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
>>> outputs = model.generate(input_ids=inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
```
فك ترميز الرموز المولدة مرة أخرى إلى نص:
```py
>>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
['Somatic hypermutation allows the immune system to detect the presence of other viruses as they become more prevalent. Therefore, researchers have identified a high proportion of human viruses. The proportion of virus-associated viruses in our study increases with age. Therefore, we propose a simple algorithm to detect the presence of these new viruses in our samples as a sign of improved immunity. A first study based on this algorithm, which will be published in Science on Friday, aims to show that this finding could translate into the development of a better vaccine that is more effective for']
```
</tf>
</frameworkcontent>

View File

@ -1,442 +0,0 @@
<!--Copyright 2022 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# نمذجة اللغة المقنعة (Masked language modeling)
[[open-in-colab]]
<Youtube id="mqElG5QJWUg"/>
تتنبأ نمذجة اللغة المقنعة برمز مقنع في تسلسل، ويمكن للنموذج الانتباه إلى الرموز بشكل ثنائي الاتجاه. هذا
يعني أن النموذج لديه إمكانية الوصول الكاملة إلى الرموز الموجودة على اليسار واليمين. تعد نمذجة اللغة المقنعة ممتازة للمهام التي
تتطلب فهمًا سياقيًا جيدًا لتسلسل كامل. BERT هو مثال على نموذج لغة مقنع.
سيوضح لك هذا الدليل كيفية:
1. تكييف [DistilRoBERTa](https://huggingface.co/distilbert/distilroberta-base) على مجموعة فرعية [r/askscience](https://www.reddit.com/r/askscience/) من مجموعة بيانات [ELI5](https://huggingface.co/datasets/eli5).
2. استخدام نموذج المدرب الخاص بك للاستدلال.
<Tip>
لمعرفة جميع البنى والنسخ المتوافقة مع هذه المهمة، نوصي بالتحقق من [صفحة المهمة](https://huggingface.co/tasks/fill-mask)
</Tip>
قبل أن تبدأ، تأكد من تثبيت جميع المكتبات الضرورية:
```bash
pip install transformers datasets evaluate
```
نحن نشجعك على تسجيل الدخول إلى حساب Hugging Face الخاص بك حتى تتمكن من تحميل ومشاركة نموذجك مع المجتمع. عندما تتم مطالبتك، أدخل رمزك لتسجيل الدخول:
```py
>>> from huggingface_hub import notebook_login
>>> notebook_login()
```
## تحميل مجموعة بيانات ELI5
ابدأ بتحميل أول 5000 مثال من مجموعة بيانات [ELI5-Category](https://huggingface.co/datasets/eli5_category) باستخدام مكتبة 🤗 Datasets. سيعطيك هذا فرصة للتجربة والتأكد من أن كل شيء يعمل قبل قضاء المزيد من الوقت في التدريب على مجموعة البيانات الكاملة.
```py
>>> from datasets import load_dataset
>>> eli5 = load_dataset("eli5_category", split="train[:5000]")
```
قم بتقسيم مجموعة البيانات `train` إلى مجموعتي تدريب واختبار باستخدام الدالة [`~datasets.Dataset.train_test_split`]:
```py
>>> eli5 = eli5.train_test_split(test_size=0.2)
```
ثم ألق نظرة على مثال:
```py
>>> eli5["train"][0]
{'q_id': '7h191n',
'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
'selftext': '',
'category': 'Economics',
'subreddit': 'explainlikeimfive',
'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
'score': [21, 19, 5, 3],
'text_urls': [[],
[],
[],
['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
'title_urls': ['url'],
'selftext_urls': ['url']}
```
على الرغم من أن هذا قد يبدو كثيرًا، إلا أنك مهتم حقًا بحقل `text`. ما هو رائع حول مهام نمذجة اللغة هو أنك لا تحتاج إلى تسميات (تُعرف أيضًا باسم المهمة غير الخاضعة للإشراف) لأن الكلمة التالية *هي* التسمية.
## معالجة مسبقة (Preprocess)
<Youtube id="8PmhEIXhBvI"/>
بالنسبة لنمذجة اللغة المقنعة، فإن الخطوة التالية هي تحميل معالج DistilRoBERTa لمعالجة حقل `text` الفرعي:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilroberta-base")
```
ستلاحظ من المثال أعلاه، أن حقل `text` موجود بالفعل داخل `answers`. هذا يعني أنك ستحتاج إلى استخراج حقل `text` الفرعي من بنيته المضمنة باستخدام الدالة [`flatten`](https://huggingface.co/docs/datasets/process#flatten):
```py
>>> eli5 = eli5.flatten()
>>> eli5["train"][0]
{'q_id': '7h191n',
'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
'selftext': '',
'category': 'Economics',
'subreddit': 'explainlikeimfive',
'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
'answers.score': [21, 19, 5, 3],
'answers.text_urls': [[],
[],
[],
['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
'title_urls': ['url'],
'selftext_urls': ['url']}
```
كل حقل فرعي هو الآن عمود منفصل كما هو موضح بواسطة بادئة `answers`، وحقل `text` هو قائمة الآن. بدلاً من
معالجة كل جملة بشكل منفصل، قم بتحويل القائمة إلى سلسلة حتى تتمكن من معالجتها بشكل مشترك.
هنا أول دالة معالجة مسبقة لربط قائمة السلاسل لكل مثال ومعالجة النتيجة:
```py
>>> def preprocess_function(examples):
... return tokenizer([" ".join(x) for x in examples["answers.text"]])
```
لتطبيق دالة المعالجة المسبقة على مجموعة البيانات بأكملها، استخدم الدالة 🤗 Datasets [`~datasets.Dataset.map`]. يمكنك تسريع دالة `map` عن طريق تعيين `batched=True` لمعالجة عدة عناصر في وقت واحد، وزيادة عدد العمليات باستخدام `num_proc`. احذف أي أعمدة غير ضرورية:
```py
>>> tokenized_eli5 = eli5.map(
... preprocess_function,
... batched=True,
... num_proc=4,
... remove_columns=eli5["train"].column_names,
... )
```
تحتوي مجموعة البيانات هذه على تسلسلات رمزية، ولكن بعضها أطول من الطول الأقصى للمدخلات للنموذج.
يمكنك الآن استخدام دالة معالجة مسبقة ثانية لـ:
- تجميع جميع التسلسلات
- تقسيم التسلسلات المجمّعة إلى أجزاء أقصر محددة بـ `block_size`، والتي يجب أن تكون أقصر من الحد الأقصى لطول المدخلات ومناسبة لذاكرة GPU.
```py
>>> block_size = 128
>>> def group_texts(examples):
... # تجميع جميع النصوص.
... concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
... total_length = len(concatenated_examples[list(examples.keys())[0]])
... # نتجاهل الجزء المتبقي الصغير، يمكننا إضافة الحشو إذا كان النموذج يدعمه بدلاً من هذا الإسقاط، يمكنك
... # تخصيص هذا الجزء حسب احتياجاتك.
... if total_length >= block_size:
... total_length = (total_length // block_size) * block_size
... # تقسيمها إلى أجزاء بحجم block_size.
... result = {
... k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
... for k, t in concatenated_examples.items()
... }
... return result
```
طبق دالة `group_texts` على مجموعة البيانات بأكملها:
```py
>>> lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
```
الآن، قم بإنشاء دفعة من الأمثلة باستخدام [`DataCollatorForLanguageModeling`]. من الأكثر كفاءة أن تقوم بـ *الحشو الديناميكي* ليصل طولها إلى أطول جملة في الدفعة أثناء التجميع، بدلاً من حشو مجموعة البيانات بأكملها إلى الطول الأقصى.
<frameworkcontent>
<pt>
استخدم رمز نهاية التسلسل كرمز الحشو وحدد `mlm_probability` لحجب الرموز عشوائياً كل مرة تكرر فيها البيانات:
```py
>>> from transformers import DataCollatorForLanguageModeling
>>> tokenizer.pad_token = tokenizer.eos_token
>>> data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)
```
</pt>
<tf>
استخدم رمز نهاية التسلسل كرمز الحشو وحدد `mlm_probability` لحجب الرموز عشوائياً كل مرة تكرر فيها البيانات:
```py
>>> from transformers import DataCollatorForLanguageModeling
>>> data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15, return_tensors="tf")
```
</tf>
</frameworkcontent>
## التدريب (Train)
<frameworkcontent>
<pt>
<Tip>
إذا لم تكن على دراية بتعديل نموذج باستخدام [`Trainer`], ألق نظرة على الدليل الأساسي [هنا](../training#train-with-pytorch-trainer)!
</Tip>
أنت مستعد الآن لبدء تدريب نموذجك! قم بتحميل DistilRoBERTa باستخدام [`AutoModelForMaskedLM`]:
```py
>>> from transformers import AutoModelForMaskedLM
>>> model = AutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
```
في هذه المرحلة، تبقى ثلاث خطوات فقط:
1. حدد معلمات التدريب الخاصة بك في [`TrainingArguments`]. المعلمة الوحيدة المطلوبة هي `output_dir` والتي تحدد مكان حفظ نموذجك. ستقوم بدفع هذا النموذج إلى Hub عن طريق تعيين `push_to_hub=True` (يجب أن تكون مسجلاً الدخول إلى Hugging Face لتحميل نموذجك).
2. قم بتمرير معلمات التدريب إلى [`Trainer`] مع النموذج، ومجموعات البيانات، ومجمّع البيانات.
3. قم باستدعاء [`~Trainer.train`] لتعديل نموذجك.
```py
>>> training_args = TrainingArguments(
... output_dir="my_awesome_eli5_mlm_model",
... eval_strategy="epoch",
... learning_rate=2e-5,
... num_train_epochs=3,
... weight_decay=0.01,
... push_to_hub=True,
... )
>>> trainer = Trainer(
... model=model,
... args=training_args,
... train_dataset=lm_dataset["train"],
... eval_dataset=lm_dataset["test"],
... data_collator=data_collator,
... tokenizer=tokenizer,
... )
>>> trainer.train()
```
بمجرد اكتمال التدريب، استخدم طريقة [`~transformers.Trainer.evaluate`] لتقييم النموذج والحصول على مقياس
الحيرة:
```py
>>> import math
>>> eval_results = trainer.evaluate()
>>> print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
Perplexity: 8.76
```
ثم شارك نموذجك على Hub باستخدام طريقة [`~transformers.Trainer.push_to_hub`] حتى يتمكن الجميع من استخدام نموذجك:
```py
>>> trainer.push_to_hub()
```
</pt>
<tf>
<Tip>
إذا لم تكن على دراية بتعديل نموذج باستخدام Keras، ألق نظرة على الدليل الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
</Tip>
لتعديل نموذج في TensorFlow، ابدأ بإعداد دالة محسن، وجدول معدل التعلم، وبعض معلمات التدريب:
```py
>>> from transformers import create_optimizer, AdamWeightDecay
>>> optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
```
ثم يمكنك تحميل DistilRoBERTa باستخدام [`TFAutoModelForMaskedLM`]:
```py
>>> from transformers import TFAutoModelForMaskedLM
>>> model = TFAutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
```
قم بتحويل مجموعات بياناتك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
```py
>>> tf_train_set = model.prepare_tf_dataset(
... lm_dataset["train"],
... shuffle=True,
... batch_size=16,
... collate_fn=data_collator,
... )
>>> tf_test_set = model.prepare_tf_dataset(
... lm_dataset["test"],
... shuffle=False,
... batch_size=16,
... collate_fn=data_collator,
... )
```
قم بتهيئة النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن نماذج Transformers لديها جميعها دالة خسارة افتراضية ذات صلة بالمهمة، لذلك لا تحتاج إلى تحديد واحدة ما لم تكن تريد ذلك:
```py
>>> import tensorflow as tf
>>> model.compile(optimizer=optimizer) # لا توجد حجة للخسارة!
```
يمكن القيام بذلك عن طريق تحديد مكان دفع نموذجك ومعالج الرموز في [`~transformers.PushToHubCallback`]:
```py
>>> from transformers.keras_callbacks import PushToHubCallback
>>> callback = PushToHubCallback(
... output_dir="my_awesome_eli5_mlm_model",
... tokenizer=tokenizer,
... )
```
أخيراً، أنت مستعد لبدء تدريب نموذجك! قم باستدعاء [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق، وعدد العصور، والتعليقات الخاصة بك لتعديل النموذج:
```py
>>> model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
```
بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائياً إلى Hub حتى يتمكن الجميع من استخدامه!
</tf>
</frameworkcontent>
<Tip>
لمثال أكثر تفصيلاً حول كيفية تعديل نموذج للنمذجة اللغوية المقنعة، ألق نظرة على الدفتر المقابل
[دفتر PyTorch](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)
أو [دفتر TensorFlow](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling-tf.ipynb).
</Tip>
## الاستدلال
رائع، الآن بعد أن قمت بتعديل نموذج، يمكنك استخدامه للاستدلال!
جهّز بعض النصوص التي تريد أن يملأ النموذج الفراغات فيها، واستخدم الرمز الخاص `<mask>` للإشارة إلى الفراغ:
```py
>>> text = "The Milky Way is a <mask> galaxy."
```
أبسط طريقة لتجربة نموذجك المعدل للاستدلال هي استخدامه في [`pipeline`]. قم بإنشاء كائن `pipeline` لملء الفراغ مع نموذجك، ومرر نصك إليه. إذا أردت، يمكنك استخدام معلمة `top_k` لتحديد عدد التنبؤات التي تريد إرجاعها:
```py
>>> from transformers import pipeline
>>> mask_filler = pipeline("fill-mask", "username/my_awesome_eli5_mlm_model")
>>> mask_filler(text, top_k=3)
[{'score': 0.5150994658470154,
'token': 21300,
'token_str': ' spiral',
'sequence': 'The Milky Way is a spiral galaxy.'},
{'score': 0.07087188959121704,
'token': 2232,
'token_str': ' massive',
'sequence': 'The Milky Way is a massive galaxy.'},
{'score': 0.06434620916843414,
'token': 650,
'token_str': ' small',
'sequence': 'The Milky Way is a small galaxy.'}]
```
<frameworkcontent>
<pt>
قم بتجزئة النص وإرجاع `input_ids` كمتجهات PyTorch. ستحتاج أيضًا إلى تحديد موضع رمز `<mask>`:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
>>> inputs = tokenizer(text, return_tensors="pt")
>>> mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
```
قم بتمرير المدخلات إلى النموذج وإرجاع `logits` للرمز المقنع:
```py
>>> from transformers import AutoModelForMaskedLM
>>> model = AutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
>>> logits = model(**inputs).logits
>>> mask_token_logits = logits[0, mask_token_index, :]
```
ثم قم بإرجاع الرموز الثلاثة المقنعة ذات الاحتمالية الأعلى وطباعتها:
```py
>>> top_3_tokens = torch.topk(mask_token_logits, 3, dim=1).indices[0].tolist()
>>> for token in top_3_tokens:
... print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
The Milky Way is a spiral galaxy.
The Milky Way is a massive galaxy.
The Milky Way is a small galaxy.
```
</pt>
<tf>
قم بتقسيم النص إلى رموز وإرجاع `input_ids` كـ TensorFlow tensors. ستحتاج أيضًا إلى تحديد موضع رمز `<mask>`:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
>>> inputs = tokenizer(text, return_tensors="tf")
>>> mask_token_index = tf.where(inputs["input_ids"] == tokenizer.mask_token_id)[0, 1]
```
قم بتمرير المدخلات إلى النموذج وإرجاع `logits` للرمز المقنع:
```py
>>> from transformers import TFAutoModelForMaskedLM
>>> model = TFAutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
>>> logits = model(**inputs).logits
>>> mask_token_logits = logits[0, mask_token_index, :]
```
ثم قم بإرجاع الرموز الثلاثة المقنعة ذات الاحتمالية الأعلى وطباعتها:
```py
>>> top_3_tokens = tf.math.top_k(mask_token_logits, 3).indices.numpy()
>>> for token in top_3_tokens:
... print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
The Milky Way is a spiral galaxy.
The Milky Way is a massive galaxy.
The Milky Way is a small galaxy.
```
</tf>
</frameworkcontent>

View File

@ -1,452 +0,0 @@
<!--Copyright 2022 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# الاختيار من متعدد (Multiple choice)
[[open-in-colab]]
مهمة الاختيار من متعدد مشابهة لمهمة الإجابة على الأسئلة، ولكن مع توفير عدة إجابات محتملة مع سياق، ويُدرّب النموذج على تحديد الإجابة الصحيحة.
سيوضح لك هذا الدليل كيفية:
1. ضبط نموذج [BERT](https://huggingface.co/google-bert/bert-base-uncased) باستخدام الإعداد `regular` لمجموعة بيانات [SWAG](https://huggingface.co/datasets/swag) لاختيار الإجابة الأفضل من بين الخيارات المتعددة المتاحة مع السياق.
2. استخدام النموذج المضبوط للاستدلال.
قبل البدء، تأكد من تثبيت جميع المكتبات الضرورية:
```bash
pip install transformers datasets evaluate
```
نشجعك على تسجيل الدخول إلى حساب Hugging Face الخاص بك حتى تتمكن من تحميل نموذجك ومشاركته مع المجتمع. عند المطالبة، أدخل الرمز المميز الخاص بك لتسجيل الدخول:
```py
>>> from huggingface_hub import notebook_login
>>> notebook_login()
```
## تحميل مجموعة بيانات SWAG
ابدأ بتحميل تهيئة `regular` لمجموعة بيانات SWAG من مكتبة 🤗 Datasets:
```py
>>> from datasets import load_dataset
>>> swag = load_dataset("swag", "regular")
```
ثم ألق نظرة على مثال:
```py
>>> swag["train"][0]
{'ending0': 'passes by walking down the street playing their instruments.',
'ending1': 'has heard approaching them.',
'ending2': "arrives and they're outside dancing and asleep.",
'ending3': 'turns the lead singer watches the performance.',
'fold-ind': '3416',
'gold-source': 'gold',
'label': 0,
'sent1': 'Members of the procession walk down the street holding small horn brass instruments.',
'sent2': 'A drum line',
'startphrase': 'Members of the procession walk down the street holding small horn brass instruments. A drum line',
'video-id': 'anetv_jkn6uvmqwh4'}
```
على الرغم من أن الحقول تبدو كثيرة، إلا أنها في الواقع بسيطة جداً:
- `sent1` و `sent2`: يعرض هذان الحقلان بداية الجملة، وبدمجهما معًا، نحصل على حقل `startphrase`.
- `ending`: يقترح نهاية محتملة للجملة، واحدة منها فقط هي الصحيحة.
- `label`: يحدد نهاية الجملة الصحيحة.
## المعالجة المسبقة (Preprocess)
الخطوة التالية هي استدعاء مُجزئ BERT لمعالجة بدايات الجمل والنهايات الأربع المحتملة:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
```
تحتاج دالة المعالجة المسبقة التي تريد إنشاءها إلى:
1. إنشاء أربع نسخ من حقل `sent1` ودمج كل منها مع `sent2` لإعادة إنشاء كيفية بدء الجملة.
2. دمج `sent2` مع كل من نهايات الجمل الأربع المحتملة.
3. تتجميع هاتين القائمتين لتتمكن من تجزئتهما، ثم إعادة ترتيبها بعد ذلك بحيث يكون لكل مثال حقول `input_ids` و `attention_mask` و `labels` مقابلة.
```py
>>> ending_names = ["ending0", "ending1", "ending2", "ending3"]
>>> def preprocess_function(examples):
... first_sentences = [[context] * 4 for context in examples["sent1"]]
... question_headers = examples["sent2"]
... second_sentences = [
... [f"{header} {examples[end][i]}" for end in ending_names] for i, header in enumerate(question_headers)
... ]
... first_sentences = sum(first_sentences, [])
... second_sentences = sum(second_sentences, [])
... tokenized_examples = tokenizer(first_sentences, second_sentences, truncation=True)
... return {k: [v[i : i + 4] for i in range(0, len(v), 4)] for k, v in tokenized_examples.items()}
```
لتطبيق دالة المعالجة المسبقة على مجموعة البيانات بأكملها، استخدم طريقة [`~datasets.Dataset.map`] الخاصة بـ 🤗 Datasets. يمكنك تسريع دالة `map` عن طريق تعيين `batched=True` لمعالجة عناصر متعددة من مجموعة البيانات في وقت واحد:
```py
tokenized_swag = swag.map(preprocess_function, batched=True)
```
لا يحتوي 🤗 Transformers على مجمع بيانات للاختيار من متعدد، لذلك ستحتاج إلى تكييف [`DataCollatorWithPadding`] لإنشاء دفعة من الأمثلة. من الأكفأ إضافة حشو (padding) ديناميكي للجمل إلى أطول طول في دفعة أثناء التجميع، بدلاً من حشو مجموعة البيانات بأكملها إلى الحد الأقصى للطول.
يقوم `DataCollatorForMultipleChoice` بتجميع جميع مدخلات النموذج، ويطبق الحشو، ثم يعيد تجميع النتائج في شكلها الأصلي:
<frameworkcontent>
<pt>
```py
>>> from dataclasses import dataclass
>>> from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
>>> from typing import Optional, Union
>>> import torch
>>> @dataclass
... class DataCollatorForMultipleChoice:
... """
... Data collator that will dynamically pad the inputs for multiple choice received.
... """
... tokenizer: PreTrainedTokenizerBase
... padding: Union[bool, str, PaddingStrategy] = True
... max_length: Optional[int] = None
... pad_to_multiple_of: Optional[int] = None
... def __call__(self, features):
... label_name = "label" if "label" in features[0].keys() else "labels"
... labels = [feature.pop(label_name) for feature in features]
... batch_size = len(features)
... num_choices = len(features[0]["input_ids"])
... flattened_features = [
... [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
... ]
... flattened_features = sum(flattened_features, [])
... batch = self.tokenizer.pad(
... flattened_features,
... padding=self.padding,
... max_length=self.max_length,
... pad_to_multiple_of=self.pad_to_multiple_of,
... return_tensors="pt",
... )
... batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
... batch["labels"] = torch.tensor(labels, dtype=torch.int64)
... return batch
```
</pt>
<tf>
```py
>>> from dataclasses import dataclass
>>> from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
>>> from typing import Optional, Union
>>> import tensorflow as tf
>>> @dataclass
... class DataCollatorForMultipleChoice:
... """
... Data collator that will dynamically pad the inputs for multiple choice received.
... """
... tokenizer: PreTrainedTokenizerBase
... padding: Union[bool, str, PaddingStrategy] = True
... max_length: Optional[int] = None
... pad_to_multiple_of: Optional[int] = None
... def __call__(self, features):
... label_name = "label" if "label" in features[0].keys() else "labels"
... labels = [feature.pop(label_name) for feature in features]
... batch_size = len(features)
... num_choices = len(features[0]["input_ids"])
... flattened_features = [
... [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
... ]
... flattened_features = sum(flattened_features, [])
... batch = self.tokenizer.pad(
... flattened_features,
... padding=self.padding,
... max_length=self.max_length,
... pad_to_multiple_of=self.pad_to_multiple_of,
... return_tensors="tf",
... )
... batch = {k: tf.reshape(v, (batch_size, num_choices, -1)) for k, v in batch.items()}
... batch["labels"] = tf.convert_to_tensor(labels, dtype=tf.int64)
... return batch
```
</tf>
</frameworkcontent>
## التقييم (Evaluate)
يُفضل غالبًا تضمين مقياس أثناء التدريب لتقييم أداء نموذجك. يمكنك تحميل طريقة تقييم بسرعة باستخدام مكتبة 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index). لهذه المهمة، قم بتحميل مقياس [الدقة](https://huggingface.co/spaces/evaluate-metric/accuracy) (انظر إلى [الجولة السريعة](https://huggingface.co/docs/evaluate/a_quick_tour) لـ 🤗 Evaluate لمعرفة المزيد حول كيفية تحميل المقياس وحسابه):
```py
>>> import evaluate
>>> accuracy = evaluate.load("accuracy")
```
ثم أنشئ دالة لتمرير التنبؤات والتسميات إلى [`~evaluate.EvaluationModule.compute`] لحساب الدقة:
```py
>>> import numpy as np
>>> def compute_metrics(eval_pred):
... predictions, labels = eval_pred
... predictions = np.argmax(predictions, axis=1)
... return accuracy.compute(predictions=predictions, references=labels)
```
دالتك `compute_metrics` جاهزة الآن، وستعود إليها عند إعداد تدريبك.
## التدريب (Train)
<frameworkcontent>
<pt>
<Tip>
إذا لم تكن معتادًا على ضبط نموذج باستخدام [`Trainer`], فراجع الدرس الأساسي [هنا](../training#train-with-pytorch-trainer)!
</Tip>
أنت جاهز لبدء تدريب نموذجك الآن! قم بتحميل BERT باستخدام [`AutoModelForMultipleChoice`]:
```py
>>> from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
>>> model = AutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
```
في هذه المرحلة، تبقى ثلاث خطوات فقط:
1. حدد معلمات التدريب الخاصة بك في [`TrainingArguments`]. المعلمة الوحيدة المطلوبة هي `output_dir` التي تحدد مكان حفظ نموذجك. ستدفع هذا النموذج إلى Hub عن طريق تعيين `push_to_hub=True` (يجب عليك تسجيل الدخول إلى Hugging Face لتحميل نموذجك). في نهاية كل حقبة، سيقوم [`Trainer`] بتقييم الدقة وحفظ نقطة فحص التدريب.
2. مرر معلمات التدريب إلى [`Trainer`] جنبًا إلى جنب مع النموذج ومُجمِّع البيانات والمعالج ودالة تجميع البيانات ودالة `compute_metrics`.
3. استدعي [`~Trainer.train`] لضبط نموذجك.
```py
>>> training_args = TrainingArguments(
... output_dir="my_awesome_swag_model",
... eval_strategy="epoch",
... save_strategy="epoch",
... load_best_model_at_end=True,
... learning_rate=5e-5,
... per_device_train_batch_size=16,
... per_device_eval_batch_size=16,
... num_train_epochs=3,
... weight_decay=0.01,
... push_to_hub=True,
... )
>>> trainer = Trainer(
... model=model,
... args=training_args,
... train_dataset=tokenized_swag["train"],
... eval_dataset=tokenized_swag["validation"],
... processing_class=tokenizer,
... data_collator=DataCollatorForMultipleChoice(tokenizer=tokenizer),
... compute_metrics=compute_metrics,
... )
>>> trainer.train()
```
بمجرد اكتمال التدريب، شارك نموذجك مع Hub باستخدام طريقة [`~transformers.Trainer.push_to_hub`] حتى يتمكن الجميع من استخدام نموذجك:
```py
>>> trainer.push_to_hub()
```
</pt>
<tf>
<Tip>
إذا لم تكن معتادًا على ضبط نموذج باستخدام Keras، فراجع الدرس الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
</Tip>
لضبط نموذج في TensorFlow، ابدأ بإعداد دالة مُحسِّن وجدول معدل التعلم وبعض معلمات التدريب:
```py
>>> from transformers import create_optimizer
>>> batch_size = 16
>>> num_train_epochs = 2
>>> total_train_steps = (len(tokenized_swag["train"]) // batch_size) * num_train_epochs
>>> optimizer, schedule = create_optimizer(init_lr=5e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
```
ثم يمكنك تحميل BERT باستخدام [`TFAutoModelForMultipleChoice`]:
```py
>>> from transformers import TFAutoModelForMultipleChoice
>>> model = TFAutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
```
حوّل مجموعات البيانات الخاصة بك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
```py
>>> data_collator = DataCollatorForMultipleChoice(tokenizer=tokenizer)
>>> tf_train_set = model.prepare_tf_dataset(
... tokenized_swag["train"],
... shuffle=True,
... batch_size=batch_size,
... collate_fn=data_collator,
... )
>>> tf_validation_set = model.prepare_tf_dataset(
... tokenized_swag["validation"],
... shuffle=False,
... batch_size=batch_size,
... collate_fn=data_collator,
... )
```
قم بتهيئة النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن جميع نماذج Transformers تحتوي على دالة خسارة مناسبة للمهمة بشكل افتراضي، لذلك لا تحتاج إلى تحديد واحدة ما لم ترغب في ذلك:
```py
>>> model.compile(optimizer=optimizer) # لا توجد وسيطة خسارة!
```
الخطوتان الأخيرتان قبل بدء التدريب هما: حساب دقة التنبؤات، وتوفير طريقة لرفع النموذج إلى Hub. ويمكن تحقيق ذلك باستخدام [استدعاءات Keras](../main_classes/keras_callbacks)
مرر دالتك `compute_metrics` إلى [`~transformers.KerasMetricCallback`]:
```py
>>> from transformers.keras_callbacks import KerasMetricCallback
>>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
```
حدد مكان دفع نموذجك ومعالجك في [`~transformers.PushToHubCallback`]:
```py
>>> from transformers.keras_callbacks import PushToHubCallback
>>> push_to_hub_callback = PushToHubCallback(
... output_dir="my_awesome_model",
... tokenizer=tokenizer,
... )
```
ثم قم بتضمين الاستدعاءات معًا:
```py
>>> callbacks = [metric_callback, push_to_hub_callback]
```
أخيرًا، أنت جاهز لبدء تدريب نموذجك! استدعِ[`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق من الصحة وعدد الحقب والاستدعاءات لضبط النموذج:
```py
>>> model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=2, callbacks=callbacks)
```
بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
</tf>
</frameworkcontent>
<Tip>
للحصول على مثال أكثر تعمقًا حول كيفية ضبط نموذج للاختيار من متعدد، ألق نظرة على [دفتر ملاحظات PyTorch](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)
أو [دفتر ملاحظات TensorFlow](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/multiple_choice-tf.ipynb) المقابل.
</Tip>
## الاستدلال (Inference)
رائع، الآن بعد أن قمت بضبط نموذج، يمكنك استخدامه للاستدلال!
قم بإنشاء نص واقتراح إجابتين محتملتين:
```py
>>> prompt = "France has a bread law, Le Décret Pain, with strict rules on what is allowed in a traditional baguette."
>>> candidate1 = "The law does not apply to croissants and brioche."
>>> candidate2 = "The law applies to baguettes."
```
<frameworkcontent>
<pt>
قم بتحليل كل مطالبة وزوج إجابة مرشح وأعد تنسورات PyTorch. يجب عليك أيضًا إنشاء بعض `العلامات`:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_swag_model")
>>> inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="pt", padding=True)
>>> labels = torch.tensor(0).unsqueeze(0)
```
مرر مدخلاتك والعلامات إلى النموذج وأرجع`logits`:
```py
>>> from transformers import AutoModelForMultipleChoice
>>> model = AutoModelForMultipleChoice.from_pretrained("username/my_awesome_swag_model")
>>> outputs = model(**{k: v.unsqueeze(0) for k, v in inputs.items()}, labels=labels)
>>> logits = outputs.logits
```
استخرج الفئة ذات الاحتمالية الأكبر:
```py
>>> predicted_class = logits.argmax().item()
>>> predicted_class
0
```
</pt>
<tf>
قم بتحليل كل مطالبة وزوج إجابة مرشح وأعد موترات TensorFlow:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_swag_model")
>>> inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="tf", padding=True)
```
مرر مدخلاتك إلى النموذج وأعد القيم logits:
```py
>>> from transformers import TFAutoModelForMultipleChoice
>>> model = TFAutoModelForMultipleChoice.from_pretrained("username/my_awesome_swag_model")
>>> inputs = {k: tf.expand_dims(v, 0) for k, v in inputs.items()}
>>> outputs = model(inputs)
>>> logits = outputs.logits
```
استخرج الفئة ذات الاحتمالية الأكبر:
```py
>>> predicted_class = int(tf.math.argmax(logits, axis=-1)[0])
>>> predicted_class
0
```
</tf>
</frameworkcontent>

View File

@ -1,432 +0,0 @@
<!--Copyright 2022 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# الإجابة على الأسئلة (Question answering)
[[open-in-colab]]
<Youtube id="ajPx5LwJD-I"/>
تُقدّم مهام الإجابة على الأسئلة إجابةً بناءً على سؤال. إذا سبق لك أن سألت مساعدًا افتراضيًا مثل Alexa أو Siri أو Google عن حالة الطقس، فأنت قد استخدمت نموذج للإجابة على الأسئلة من قبل. هناك نوعان شائعان لمهام الإجابة على الأسئلة:
- الاستخراجية: استخراج الإجابة من السياق المحدد.
- التلخيصية: إنشاء إجابة من السياق تجيب على السؤال بشكل صحيح.
سيوضح لك هذا الدليل كيفية:
1. ضبط [DistilBERT](https://huggingface.co/distilbert/distilbert-base-uncased) على مجموعة بيانات [SQuAD](https://huggingface.co/datasets/squad) للإجابة على الأسئلة الاستخراجية.
2. استخدام النموذج المضبوط للاستدلال.
<Tip>
لمشاهدة جميع الهياكل والنسخ المتوافقة مع هذه المهمة، نوصي بالرجوع إلى [صفحة المهمة](https://huggingface.co/tasks/question-answering)
</Tip>
قبل البدء، تأكد من تثبيت جميع المكتبات الضرورية:
```bash
pip install transformers datasets evaluate
```
نشجعك على تسجيل الدخول إلى حساب Hugging Face الخاص بك حتى تتمكن من تحميل نموذجك ومشاركته مع المجتمع. عند المطالبة، أدخل الرمز المميز الخاص بك لتسجيل الدخول:
```py
>>> from huggingface_hub import notebook_login
>>> notebook_login()
```
## تحميل مجموعة بيانات SQuAD
ابدأ بتحميل جزء أصغر من مجموعة بيانات SQuAD من مكتبة 🤗 Datasets. سيتيح لك ذلك فرصة للتجربة والتحقق من عمل كل شيء بشكل صحيح قبل قضاء المزيد من الوقت في التدريب على مجموعة البيانات الكاملة.
```py
>>> from datasets import load_dataset
>>> squad = load_dataset("squad", split="train[:5000]")
```
قم بتقسيم تقسيم `train` لمجموعة البيانات إلى مجموعة تدريب واختبار باستخدام طريقة [`~datasets.Dataset.train_test_split`]:
```py
>>> squad = squad.train_test_split(test_size=0.2)
```
ثم ألق نظرة على مثال:
```py
>>> squad["train"][0]
{'answers': {'answer_start': [515], 'text': ['Saint Bernadette Soubirous']},
'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
'id': '5733be284776f41900661182',
'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?',
'title': 'University_of_Notre_Dame'
}
```
هناك العديد من الحقول المهمة هنا:
- `answers`: موقع بداية الرمز المميز للإجابة ونص الإجابة.
- `context`: معلومات أساسية يحتاج النموذج إلى استخراج الإجابة منها.
- `question`: السؤال الذي يجب على النموذج الإجابة عليه.
## المعالجة المسبقة (Preprocess)
<Youtube id="qgaM0weJHpA"/>
الخطوة التالية هي تحميل المحلل اللغوى DistilBERT لمعالجة حقلي `question` و `context`:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
```
هناك بعض خطوات المعالجة المسبقة الخاصة بمهام الإجابة على الأسئلة التي يجب أن تكون على دراية بها:
1. قد تحتوي بعض الأمثلة في مجموعة البيانات على `context` طويلًا يتجاوز الحد الأقصى لطول مدخل النموذج. للتعامل مع النصوص الأطول، يتم اقتطاع `context` فقط عن طريق تعيين `truncation="only_second"`.
2. بعد ذلك، يتم تحديد مواضع بداية ونهاية الإجابة في `context` الأصلي عن طريق تعيين
`return_offset_mapping=True`.
3. باستخدام التعيين، يمكن الآن تحديد رموز بداية ونهاية الإجابة. استخدم طريقة [`~tokenizers.Encoding.sequence_ids`]
لتحديد أجزاء الإزاحة التي تتوافق مع `question` و `context`.
فيما يلي كيفية إنشاء دالة لقص وتعيين رموز البداية والنهاية لـ `answer` إلى `context`:
```py
>>> def preprocess_function(examples):
... questions = [q.strip() for q in examples["question"]]
... inputs = tokenizer(
... questions,
... examples["context"],
... max_length=384,
... truncation="only_second",
... return_offsets_mapping=True,
... padding="max_length",
... )
... offset_mapping = inputs.pop("offset_mapping")
... answers = examples["answers"]
... start_positions = []
... end_positions = []
... for i, offset in enumerate(offset_mapping):
... answer = answers[i]
... start_char = answer["answer_start"][0]
... end_char = answer["answer_start"][0] + len(answer["text"][0])
... sequence_ids = inputs.sequence_ids(i)
... # Find the start and end of the context
... idx = 0
... while sequence_ids[idx] != 1:
... idx += 1
... context_start = idx
... while sequence_ids[idx] == 1:
... idx += 1
... context_end = idx - 1
... # If the answer is not fully inside the context, label it (0, 0)
... if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
... start_positions.append(0)
... end_positions.append(0)
... else:
... # Otherwise it's the start and end token positions
... idx = context_start
... while idx <= context_end and offset[idx][0] <= start_char:
... idx += 1
... start_positions.append(idx - 1)
... idx = context_end
... while idx >= context_start and offset[idx][1] >= end_char:
... idx -= 1
... end_positions.append(idx + 1)
... inputs["start_positions"] = start_positions
... inputs["end_positions"] = end_positions
... return inputs
```
لتطبيق المعالجة المسبقة على كامل مجموعة البيانات، استخدم [`~datasets.Dataset.map`] من مكتبة 🤗 Datasets. يمكنك تسريع دالة `map` عن طريق تعيين `batched=True` لمعالجة عناصر متعددة من مجموعة البيانات دفعة واحدة. قم بإزالة أي أعمدة لا تحتاجها:
```py
>>> tokenized_squad = squad.map(preprocess_function, batched=True, remove_columns=squad["train"].column_names)
```
الآن قم بإنشاء دفعة من الأمثلة باستخدام [`DefaultDataCollator`]. بخلاف مجمّعات البيانات الأخرى في 🤗 Transformers، لا يطبق [`DefaultDataCollator`] أي معالجة مسبقة إضافية مثل الحشو.
<frameworkcontent>
<pt>
```py
>>> from transformers import DefaultDataCollator
>>> data_collator = DefaultDataCollator()
```
</pt>
<tf>
```py
>>> from transformers import DefaultDataCollator
>>> data_collator = DefaultDataCollator(return_tensors="tf")
```
</tf>
</frameworkcontent>
## التدريب (Train)
<frameworkcontent>
<pt>
<Tip>
إذا لم تكن معتادًا على ضبط نموذج باستخدام [`Trainer`], ألق نظرة على البرنامج التعليمي الأساسي [هنا](../training#train-with-pytorch-trainer)!
</Tip>
أنت جاهز لبدء تدريب نموذجك الآن! قم بتحميل DistilBERT باستخدام [`AutoModelForQuestionAnswering`]:
```py
>>> from transformers import AutoModelForQuestionAnswering, TrainingArguments, Trainer
>>> model = AutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
```
في هذه المرحلة، تبقى ثلاث خطوات فقط:
1. حدد المعاملات الفائقة للتدريب في [`TrainingArguments`]. المعامل الوحيد المطلوب هو `output_dir` الذي يحدد مكان حفظ نموذجك. ستدفع هذا النموذج إلى Hub عن طريق تعيين `push_to_hub=True` (يجب عليك تسجيل الدخول إلى Hugging Face لتحميل نموذجك).
2. مرر معاملات التدريب إلى [`Trainer`] جنبًا إلى جنب مع النموذج، ومجموعة البيانات، والمُحلّل النصي، ومُجمّع البيانات.
3. استدعِ ـ [`~Trainer.train`] لضبط النموذج.
```py
>>> training_args = TrainingArguments(
... output_dir="my_awesome_qa_model",
... eval_strategy="epoch",
... learning_rate=2e-5,
... per_device_train_batch_size=16,
... per_device_eval_batch_size=16,
... num_train_epochs=3,
... weight_decay=0.01,
... push_to_hub=True,
... )
>>> trainer = Trainer(
... model=model,
... args=training_args,
... train_dataset=tokenized_squad["train"],
... eval_dataset=tokenized_squad["test"],
... processing_class=tokenizer,
... data_collator=data_collator,
... )
>>> trainer.train()
```
بمجرد اكتمال التدريب، شارك نموذجك في Hub باستخدام الدالة [`~transformers.Trainer.push_to_hub`] حتى يتمكن الجميع من استخدام نموذجك:
```py
>>> trainer.push_to_hub()
```
</pt>
<tf>
<Tip>
إذا لم تكن معتادًا على ضبط نموذج باستخدام Keras، فألق نظرة على البرنامج التعليمي الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
</Tip>
لضبط نموذج في TensorFlow، ابدأ بإعداد دالة مُحسِّن، وجدول معدل التعلم، وبعض المعاملات الفائقة للتدريب:
```py
>>> from transformers import create_optimizer
>>> batch_size = 16
>>> num_epochs = 2
>>> total_train_steps = (len(tokenized_squad["train"]) // batch_size) * num_epochs
>>> optimizer, schedule = create_optimizer(
... init_lr=2e-5,
... num_warmup_steps=0,
... num_train_steps=total_train_steps,
... )
```
ثم يمكنك تحميل DistilBERT باستخدام [`TFAutoModelForQuestionAnswering`]:
```py
>>> from transformers import TFAutoModelForQuestionAnswering
>>> model = TFAutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
```
حوّل مجموعات البيانات الخاصة بك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
```py
>>> tf_train_set = model.prepare_tf_dataset(
... tokenized_squad["train"],
... shuffle=True,
... batch_size=16,
... collate_fn=data_collator,
... )
>>> tf_validation_set = model.prepare_tf_dataset(
... tokenized_squad["test"],
... shuffle=False,
... batch_size=16,
... collate_fn=data_collator,
... )
```
قم بتكوين النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method):
```py
>>> import tensorflow as tf
>>> model.compile(optimizer=optimizer)
```
آخر شيء يجب إعداده قبل بدء التدريب هو توفير طريقة لدفع نموذجك إلى Hub. يمكن القيام بذلك عن طريق تحديد مكان دفع نموذجك ومعالجك المعجمي في [`~transformers.PushToHubCallback`]:
```py
>>> from transformers.keras_callbacks import PushToHubCallback
>>> callback = PushToHubCallback(
... output_dir="my_awesome_qa_model",
... tokenizer=tokenizer,
... )
```
أخيرًا، أنت جاهز لبدء تدريب نموذجك! اتصل بـ [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق من الصحة، وعدد العهود، ومعاودة الاتصال الخاصة بك لضبط النموذج:
```py
>>> model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=[callback])
```
بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
</tf>
</frameworkcontent>
<Tip>
للحصول على مثال أكثر تعمقًا حول كيفية ضبط نموذج للإجابة على الأسئلة، ألق نظرة على [دفتر ملاحظات PyTorch](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/question_answering.ipynb) المقابل
أو [دفتر ملاحظات TensorFlow](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/question_answering-tf.ipynb).
</Tip>
## التقييم (Evaluate)
يتطلب التقييم للإجابة على الأسئلة قدرًا كبيرًا من المعالجة اللاحقة. لتوفير وقتك، يتخطى هذا الدليل خطوة التقييم. لا يزال [`Trainer`] يحسب خسارة التقييم أثناء التدريب، مما يعني أنك لست تجهل تمامًا أداء نموذجك.
إذا كان لديك المزيد من الوقت وتهتم بكيفية تقييم نموذجك للإجابة على الأسئلة، فألق نظرة على فصل [الإجابة على الأسئلة](https://huggingface.co/course/chapter7/7?fw=pt#post-processing) من دورة 🤗 Hugging Face!
## الاستدلال (Inference)
رائع، الآن بعد أن قمت بضبط نموذج، يمكنك استخدامه للاستدلال!
حدد سؤالًا وسياقًا ليقوم النموذج بالتنبؤ بالإجابة عليه:
```py
>>> question = "How many programming languages does BLOOM support?"
>>> context = "BLOOM has 176 billion parameters and can generate text in 46 languages natural languages and 13 programming languages."
```
أبسط طريقة لتجربة نموذجك المُدرَّب للاستدلال هي استخدامه في [`pipeline`]. قم بإنشاء كائن لـ `pipeline` للإجابة على الأسئلة باستخدام نموذجك، ومرِّر النص إليه:
```py
>>> from transformers import pipeline
>>> question_answerer = pipeline("question-answering", model="my_awesome_qa_model")
>>> question_answerer(question=question, context=context)
{'score': 0.2058267742395401,
'start': 10,
'end': 95,
'answer': '176 مليار معامل ويمكنه إنشاء نصوص بـ 46 لغة طبيعية و 13'}
```
يمكنك أيضًا تكرار نتائج `pipeline` يدويًا إذا أردت:
<frameworkcontent>
<pt>
قسّم النص وأرجع تنسورات PyTorch:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
>>> inputs = tokenizer(question, context, return_tensors="pt")
```
مرر مدخلاتك إلى النموذج وأرجع `logits`:
```py
>>> import torch
>>> from transformers import AutoModelForQuestionAnswering
>>> model = AutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
>>> with torch.no_grad():
... outputs = model(**inputs)
```
احصل على أعلى احتمال من مخرجات النموذج لموضعي البداية والنهاية:
```py
>>> answer_start_index = outputs.start_logits.argmax()
>>> answer_end_index = outputs.end_logits.argmax()
```
استخلاص الإجابة من الرموز المتوقعة:
```py
>>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
>>> tokenizer.decode(predict_answer_tokens)
'176 billion parameters and can generate text in 46 languages natural languages and 13'
```
</pt>
<tf>
قم بتحليل النص المعجمي وأعد موترات TensorFlow:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
>>> inputs = tokenizer(question, context, return_tensors="tf")
```
مرر مدخلاتك إلى النموذج وأعد `logits`:
```py
>>> from transformers import TFAutoModelForQuestionAnswering
>>> model = TFAutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
>>> outputs = model(**inputs)
```
احصل على أعلى احتمال من مخرجات النموذج لموضعي البداية والنهاية:
```py
>>> answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
>>> answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])
```
استخلاص الإجابة من الرموز المتوقعة:
```py
>>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
>>> tokenizer.decode(predict_answer_tokens)
'176 billion parameters and can generate text in 46 languages natural languages and 13'
```
</tf>
</frameworkcontent>

View File

@ -1,387 +0,0 @@
<!--Copyright 2022 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# تصنيف النص(Text classification)
[[open-in-colab]]
<Youtube id="leNG9fN9FQU"/>
تصنيف النص هو مهمة NLP شائعة حيث يُعيّن تصنيفًا أو فئة للنص. تستخدم بعض أكبر الشركات تصنيف النصوص في الإنتاج لمجموعة واسعة من التطبيقات العملية. أحد أكثر أشكال تصنيف النص شيوعًا هو تحليل المشاعر، والذي يقوم بتعيين تسمية مثل 🙂 إيجابية، 🙁 سلبية، أو 😐 محايدة لتسلسل نصي.
سيوضح لك هذا الدليل كيفية:
1. ضبط [DistilBERT](https://huggingface.co/distilbert/distilbert-base-uncased) على مجموعة بيانات [IMDb](https://huggingface.co/datasets/imdb) لتحديد ما إذا كانت مراجعة الفيلم إيجابية أو سلبية.
2. استخدام نموذج الضبط الدقيق للتنبؤ.
<Tip>
لرؤية جميع البنى ونقاط التحقق المتوافقة مع هذه المهمة، نوصي بالتحقق من [صفحة المهمة](https://huggingface.co/tasks/text-classification).
</Tip>
قبل أن تبدأ، تأكد من تثبيت جميع المكتبات الضرورية:
```bash
pip install transformers datasets evaluate accelerate
```
نحن نشجعك على تسجيل الدخول إلى حساب Hugging Face الخاص بك حتى تتمكن من تحميل ومشاركة نموذجك مع المجتمع. عند المطالبة، أدخل رمزك لتسجيل الدخول:
```py
>>> from huggingface_hub import notebook_login
>>> notebook_login()
```
## تحميل مجموعة بيانات IMDb
ابدأ بتحميل مجموعة بيانات IMDb من مكتبة 🤗 Datasets:
```py
>>> from datasets import load_dataset
>>> imdb = load_dataset("imdb")
```
ثم ألق نظرة على مثال:
```py
>>> imdb["test"][0]
{
"label": 0,
"text": "I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It's really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it's rubbish as they have to always say \"Gene Roddenberry's Earth...\" otherwise people would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, cheap, poorly edited (watching it without advert breaks really brings this home) trudging Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring him back as another actor. Jeeez! Dallas all over again.",
}
```
هناك حقولان في هذه المجموعة من البيانات:
- `text`: نص مراجعة الفيلم.
- `label`: قيمة إما `0` لمراجعة سلبية أو `1` لمراجعة إيجابية.
## المعالجة المسبقة(Preprocess)
الخطوة التالية هي تحميل المُجزِّئ النص DistilBERT لتهيئة لحقل `text`:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
```
أنشئ دالة لتهيئة حقل `text` وتقصير السلاسل النصية بحيث لا يتجاوز طولها الحد الأقصى لإدخالات DistilBERT:
```py
>>> def preprocess_function(examples):
... return tokenizer(examples["text"], truncation=True)
```
لتطبيق دالة التهيئة على مجموعة البيانات بأكملها، استخدم دالة 🤗 Datasets [`~datasets.Dataset.map`] . يمكنك تسريع `map` باستخدام `batched=True` لمعالجة دفعات من البيانات:
```py
tokenized_imdb = imdb.map(preprocess_function, batched=True)
```
الآن قم بإنشاء دفعة من الأمثلة باستخدام [`DataCollatorWithPadding`]. الأكثر كفاءة هو استخدام الحشو الديناميكي لجعل الجمل متساوية في الطول داخل كل دفعة، بدلًا من حشو كامل البيانات إلى الحد الأقصى للطول.
<frameworkcontent>
<pt>
```py
>>> from transformers import DataCollatorWithPadding
>>> data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
```
</pt>
<tf>
```py
>>> from transformers import DataCollatorWithPadding
>>> data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
```
</tf>
</frameworkcontent>
## التقييم(Evaluate)
يُعدّ تضمين مقياس أثناء التدريب مفيدًا لتقييم أداء النموذج. يمكنك تحميل طريقة تقييم بسرعة باستخدام مكتبة 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index) . بالنسبة لهذه المهمة، قم بتحميل مقياس [الدقة](https://huggingface.co/spaces/evaluate-metric/accuracy) (راجع جولة 🤗 Evaluate [السريعة](https://huggingface.co/docs/evaluate/a_quick_tour) لمعرفة المزيد حول كيفية تحميل وحساب مقياس):
```py
>>> import evaluate
>>> accuracy = evaluate.load("accuracy")
```
ثم أنشئ دالة تقوم بتمرير تنبؤاتك وتصنيفاتك إلى [`~evaluate.EvaluationModule.compute`] لحساب الدقة:
```py
>>> import numpy as np
>>> def compute_metrics(eval_pred):
... predictions, labels = eval_pred
... predictions = np.argmax(predictions, axis=1)
... return accuracy.compute(predictions=predictions, references=labels)
```
دالة `compute_metrics` جاهزة الآن، وستعود إليها عند إعداد التدريب.
## التدريب(Train)
قبل أن تبدأ في تدريب نموذجك، قم بإنشاء خريطة من المعرفات المتوقعة إلى تسمياتها باستخدام `id2label` و `label2id`:
```py
>>> id2label = {0: "NEGATIVE", 1: "POSITIVE"}
>>> label2id = {"NEGATIVE": 0, "POSITIVE": 1}
```
<frameworkcontent>
<pt>
<Tip>
إذا لم تكن على دراية بضبط نموذج دقيق باستخدام [`Trainer`], فالق نظرة على البرنامج التعليمي الأساسي [هنا](../training#train-with-pytorch-trainer)!
</Tip>
أنت مستعد الآن لبدء تدريب نموذجك! قم بتحميل DistilBERT مع [`AutoModelForSequenceClassification`] جنبًا إلى جنب مع عدد التصنيفات المتوقعة، وتصنيفات الخرائط:
```py
>>> from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
>>> model = AutoModelForSequenceClassification.from_pretrained(
... "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
... )
```
في هذه المرحلة، هناك ثلاث خطوات فقط متبقية:
1. حدد مُعامِلات التدريب في [`TrainingArguments`]. المُعامل المطلوب الوحيد هو `output_dir`، لتحديد مكان حفظ النموذج. يمكنك رفع النموذج إلى Hub بتعيين `push_to_hub=True` (يجب تسجيل الدخول إلى Hugging Face لرفع النموذج). سيقوم `Trainer` بتقييم الدقة وحفظ نقاط التحقق في نهاية كل حقبة.
2. مرر مُعامِلات التدريب إلى `Trainer` مع النموذج، ومجموعة البيانات، والمحلل اللغوي، ومُجمِّع البيانات، ووظيفة `compute_metrics`.
3. استدعِ [`~Trainer.train`] لضبط النموذج.
```py
>>> training_args = TrainingArguments(
... output_dir="my_awesome_model",
... learning_rate=2e-5,
... per_device_train_batch_size=16,
... per_device_eval_batch_size=16,
... num_train_epochs=2,
... weight_decay=0.01,
... eval_strategy="epoch",
... save_strategy="epoch",
... load_best_model_at_end=True,
... push_to_hub=True,
... )
>>> trainer = Trainer(
... model=model,
... args=training_args,
... train_dataset=tokenized_imdb["train"],
... eval_dataset=tokenized_imdb["test"],
... processing_class=tokenizer,
... data_collator=data_collator,
... compute_metrics=compute_metrics,
... )
>>> trainer.train()
```
<Tip>
يستخدم [`Trainer`] الحشو الديناميكي افتراضيًا عند تمرير `tokenizer` إليه. في هذه الحالة، لا تحتاج لتحديد مُجمِّع البيانات صراحةً.
</Tip>
بعد اكتمال التدريب، شارك نموذجك على Hub باستخدام الطريقة [`~transformers.Trainer.push_to_hub`] ليستخدمه الجميع:
```py
>>> trainer.push_to_hub()
```
</pt>
<tf>
<Tip>
إذا لم تكن على دراية بضبط نموذج باستخدام Keras، قم بالاطلاع على البرنامج التعليمي الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
</Tip>
لضبط نموذج في TensorFlow، ابدأ بإعداد دالة المحسن، وجدول معدل التعلم، وبعض معلمات التدريب:
```py
>>> from transformers import create_optimizer
>>> import tensorflow as tf
>>> batch_size = 16
>>> num_epochs = 5
>>> batches_per_epoch = len(tokenized_imdb["train"]) // batch_size
>>> total_train_steps = int(batches_per_epoch * num_epochs)
>>> optimizer, schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
```
ثم يمكنك تحميل DistilBERT مع [`TFAutoModelForSequenceClassification`] بالإضافة إلى عدد التصنيفات المتوقعة، وتعيينات التسميات:
```py
>>> from transformers import TFAutoModelForSequenceClassification
>>> model = TFAutoModelForSequenceClassification.from_pretrained(
... "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
... )
```
قم بتحويل مجموعات بياناتك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
```py
>>> tf_train_set = model.prepare_tf_dataset(
... tokenized_imdb["train"],
... shuffle=True,
... batch_size=16,
... collate_fn=data_collator,
... )
>>> tf_validation_set = model.prepare_tf_dataset(
... tokenized_imdb["test"],
... shuffle=False,
... batch_size=16,
... collate_fn=data_collator,
... )
```
قم بتهيئة النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن جميع نماذج Transformers لديها دالة خسارة ذات صلة بالمهمة بشكل افتراضي، لذلك لا تحتاج إلى تحديد واحدة ما لم ترغب في ذلك:
```py
>>> import tensorflow as tf
>>> model.compile(optimizer=optimizer) # No loss argument!
```
آخر أمرين يجب إعدادهما قبل بدء التدريب هو حساب الدقة من التوقعات، وتوفير طريقة لدفع نموذجك إلى Hub. يتم ذلك باستخدام [Keras callbacks](../main_classes/keras_callbacks).
قم بتمرير دالة `compute_metrics` الخاصة بك إلى [`~transformers.KerasMetricCallback`]:
```py
>>> from transformers.keras_callbacks import KerasMetricCallback
>>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
```
حدد مكان دفع نموذجك والمجزئ اللغوي في [`~transformers.PushToHubCallback`]:
```py
>>> from transformers.keras_callbacks import PushToHubCallback
>>> push_to_hub_callback = PushToHubCallback(
... output_dir="my_awesome_model",
... tokenizer=tokenizer,
... )
```
ثم اجمع الاستدعاءات معًا:
```py
>>> callbacks = [metric_callback, push_to_hub_callback]
```
أخيرًا، أنت مستعد لبدء تدريب نموذجك! قم باستدعاء [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق، وعدد الحقبات، واستدعاءاتك لضبط النموذج:
```py
>>> model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
```
بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
</tf>
</frameworkcontent>
<Tip>
للحصول على مثال أكثر عمقًا حول كيفية ضبط نموذج لتصنيف النصوص، قم بالاطلاع على الدفتر المقابل
[دفتر PyTorch](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification.ipynb)
أو [دفتر TensorFlow](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification-tf.ipynb).
</Tip>
## الاستدلال(Inference)
رائع، الآن بعد أن قمت بضبط نموذج، يمكنك استخدامه للاستدلال!
احصل على بعض النصوص التي ترغب في إجراء الاستدلال عليها:
```py
>>> text = "This was a masterpiece. Not completely faithful to the books, but enthralling from beginning to end. Might be my favorite of the three."
```
أسهل طريقة لتجربة النموذج المضبوط للاستدلال هي استخدامه ضمن [`pipeline`]. قم بإنشاء `pipeline` لتحليل المشاعر مع نموذجك، ومرر نصك إليه:
```py
>>> from transformers import pipeline
>>> classifier = pipeline("sentiment-analysis", model="stevhliu/my_awesome_model")
>>> classifier(text)
[{'label': 'POSITIVE', 'score': 0.9994940757751465}]
```
يمكنك أيضًا تكرار نتائج `pipeline` يدويًا إذا أردت:
<frameworkcontent>
<pt>
قم يتجزئة النص وإرجاع تنسورات PyTorch:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
>>> inputs = tokenizer(text, return_tensors="pt")
```
مرر المدخلات إلى النموذج واسترجع `logits`:
```py
>>> from transformers import AutoModelForSequenceClassification
>>> model = AutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
>>> with torch.no_grad():
... logits = model(**inputs).logits
```
استخرج الفئة ذات الاحتمالية الأعلى، واستخدم `id2label` لتحويلها إلى تصنيف نصي:
```py
>>> predicted_class_id = logits.argmax().item()
>>> model.config.id2label[predicted_class_id]
'POSITIVE'
```
</pt>
<tf>
قم بتحليل النص وإرجاع تنسيقات TensorFlow:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
>>> inputs = tokenizer(text, return_tensors="tf")
```
قم بتمرير مدخلاتك إلى النموذج وإرجاع `logits`:
```py
>>> from transformers import TFAutoModelForSequenceClassification
>>> model = TFAutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
>>> logits = model(**inputs).logits
```
استخرج الفئة ذات الاحتمالية الأعلى، واستخدم `id2label` لتحويلها إلى تصنيف نصي:
```py
>>> predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
>>> model.config.id2label[predicted_class_id]
'POSITIVE'
```
</tf>
</frameworkcontent>

File diff suppressed because one or more lines are too long

View File

@ -1,550 +0,0 @@
<!--Copyright 2022 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# تصنيف الرموز(Token classification)
[[open-in-colab]]
<Youtube id="wVHdVlPScxA"/>
يهدف تصنيف الرموز إلى إعطاء تسمية لكل رمز على حدة في الجملة. من أكثر مهام تصنيف الرموز شيوعًا هو التعرف على الكيانات المسماة (NER). يحاول NER تحديد تسمية لكل كيان في الجملة، مثل شخص، أو مكان، أو منظمة.
سيوضح لك هذا الدليل كيفية:
1. ضبط [DistilBERT](https://huggingface.co/distilbert/distilbert-base-uncased) على مجموعة بيانات [WNUT 17](https://huggingface.co/datasets/wnut_17) للكشف عن كيانات جديدة.
2. استخدام نموذجك المضبوط بدقة للاستدلال.
<Tip>
للاطلاع جميع البنى والنقاط المتوافقة مع هذه المهمة، نوصي بالرجوع من [صفحة المهمة](https://huggingface.co/tasks/token-classification).
</Tip>
قبل أن تبدأ، تأكد من تثبيت جميع المكتبات الضرورية:
```bash
pip install transformers datasets evaluate seqeval
```
نحن نشجعك على تسجيل الدخول إلى حساب HuggingFace الخاص بك حتى تتمكن من تحميل ومشاركة نموذجك مع المجتمع. عندما يُطلب منك، أدخل رمزك لتسجيل الدخول:
```py
>>> from huggingface_hub import notebook_login
>>> notebook_login()
```
## تحميل مجموعة بيانات WNUT 17
ابدأ بتحميل مجموعة بيانات WNUT 17 من مكتبة 🤗 Datasets:
```py
>>> from datasets import load_dataset
>>> wnut = load_dataset("wnut_17")
```
ثم ألق نظرة على مثال:
```py
>>> wnut["train"][0]
{'id': '0',
'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 8, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0],
'tokens': ['@paulwalk', 'It', "'s", 'the', 'view', 'from', 'where', 'I', "'m", 'living', 'for', 'two', 'weeks', '.', 'Empire', 'State', 'Building', '=', 'ESB', '.', 'Pretty', 'bad', 'storm', 'here', 'last', 'evening', '.']
}
```
يمثل كل رقم في `ner_tags` كياناً. حوّل الأرقام إلى أسماء التصنيفات لمعرفة ماهية الكيانات:
```py
>>> label_list = wnut["train"].features[f"ner_tags"].feature.names
>>> label_list
[
"O",
"B-corporation",
"I-corporation",
"B-creative-work",
"I-creative-work",
"B-group",
"I-group",
"B-location",
"I-location",
"B-person",
"I-person",
"B-product",
"I-product",
]
```
يشير الحرف الذي يسبق كل `ner_tag` إلى موضع الرمز للكيان:
- `B-` يشير إلى بداية الكيان.
- `I-` يشير إلى أن الرمز يقع ضمن نفس الكيان (على سبيل المثال، الرمز `State` هو جزء من كيان مثل `Empire State Building`).
- `0` يشير إلى أن الرمز لا يمثل أي كيان.
## المعالجة المسبقة(Preprocess)
<Youtube id="iY2AZYdZAr0"/>
الخطوة التالية هي تحميل مُجزِّئ النصوص DistilBERT للمعالجة المسبقة لحقل `tokens`:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
```
كما رأيت في حقل `tokens` المثال أعلاه، يبدو أن المدخل قد تم تحليله بالفعل. لكن المدخل لم يُجزأ بعد ويتعيّن عليك ضبط `is_split_into_words=True` لتقسيم الكلمات إلى كلمات فرعية. على سبيل المثال:
```py
>>> example = wnut["train"][0]
>>> tokenized_input = tokenizer(example["tokens"], is_split_into_words=True)
>>> tokens = tokenizer.convert_ids_to_tokens(tokenized_input["input_ids"])
>>> tokens
['[CLS]', '@', 'paul', '##walk', 'it', "'", 's', 'the', 'view', 'from', 'where', 'i', "'", 'm', 'living', 'for', 'two', 'weeks', '.', 'empire', 'state', 'building', '=', 'es', '##b', '.', 'pretty', 'bad', 'storm', 'here', 'last', 'evening', '.', '[SEP]']
```
ومع ذلك، يضيف هذا بعض الرموز الخاصة `[CLS]` و`[SEP]` وتقسيم الكلمات إلى أجزاء يُنشئ عدم تطابق بين المُدخلات والتسميات. قد يتم تقسيم كلمة واحدة تقابل تسمية واحدة الآن إلى كلمتين فرعيتين. ستحتاج إلى إعادة محاذاة الرموز والتسميات عن طريق:
1. ربط كل رمز بالكلمة الأصلية باستخدام الخاصية [`word_ids`](https://huggingface.co/docs/transformers/main_classes/tokenizer#transformers.BatchEncoding.word_ids).
2. تعيين التسمية `-100` للرموز الخاصة `[CLS]` و`[SEP]` بحيث يتم تجاهلها بواسطة دالة الخسارة PyTorch (انظر [CrossEntropyLoss](https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html)).
3. تسمية الرمز الأول فقط لكلمة معينة. قم بتعيين `-100` لأجزاء الكلمة الأخرى.
هنا كيف يمكنك إنشاء وظيفة لإعادة محاذاة الرموز والتسميات، وقص الجمل لتتجاوز الحد الأقصى لطول مُدخلات DistilBERT:
```py
>>> def tokenize_and_align_labels(examples):
... tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
... labels = []
... for i, label in enumerate(examples[f"ner_tags"]):
... word_ids = tokenized_inputs.word_ids(batch_index=i) # تعيين الرموز إلى كلماتهم المقابلة.
... previous_word_idx = None
... label_ids = []
... for word_idx in word_ids: # تعيين الرموز الخاصة إلى -100.
... if word_idx is None:
... label_ids.append(-100)
... elif word_idx != previous_word_idx: # تسمية الرمز الأول فقط لكلمة معينة.
... label_ids.append(label[word_idx])
... else:
... label_ids.append(-100)
... previous_word_idx = word_idx
... labels.append(label_ids)
... tokenized_inputs["labels"] = labels
... return tokenized_inputs
```
لتطبيق هذه العملية على كامل مجموعة البيانات، استخدم الدالة [`~datasets.Dataset.map`] لمجموعة بيانات 🤗. يمكنك تسريع الدالة `map` عن طريق تعيين `batched=True` لمعالجة عناصر متعددة من مجموعة البيانات في وقت واحد:
```py
>>> tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)
```
الآن قم بإنشاء دفعة من الأمثلة باستخدام [`DataCollatorWithPadding`].من الأفضل استخدام *الحشو الديناميكي* للجمل إلى أطول طول في دفعة أثناء التجميع، بدلاً من حشو مجموعة البيانات بالكامل إلى الطول الأقصى.
<frameworkcontent>
<pt>
```py
>>> from transformers import DataCollatorForTokenClassification
>>> data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
```
</pt>
<tf>
```py
>>> from transformers import DataCollatorForTokenClassification
>>> data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="tf")
```
</tf>
</frameworkcontent>
## التقييم(Evaluate)
يُعدّ تضمين مقياس أثناء التدريب مفيدًا في تقييم أداء نموذجك. يمكنك تحميل طريقة تقييم بسرعة مع مكتبة 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index). لهذه المهمة، قم بتحميل إطار [seqeval](https://huggingface.co/spaces/evaluate-metric/seqeval) (انظر جولة 🤗 Evaluate [quick tour](https://huggingface.co/docs/evaluate/a_quick_tour) لمعرفة المزيد حول كيفية تحميل وحساب مقياس). يُخرج seqeval عدة نتائج: الدقة، والاستذكار، ومقياس F1، والدقة.
```py
>>> import evaluate
>>> seqeval = evaluate.load("seqeval")
```
احصل على تسميات الكيانات المسماة (NER) أولاً،ثم أنشئ دالة تُمرر تنبؤاتك وتسمياتك الصحيحة إلى [`~evaluate.EvaluationModule.compute`] لحساب النتائج:
```py
>>> import numpy as np
>>> labels = [label_list[i] for i in example[f"ner_tags"]]
>>> def compute_metrics(p):
... predictions, labels = p
... predictions = np.argmax(predictions, axis=2)
... true_predictions = [
... [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
... for prediction, label in zip(predictions, labels)
... ]
... true_labels = [
... [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
... for prediction, label in zip(predictions, labels)
... ]
... results = seqeval.compute(predictions=true_predictions, references=true_labels)
... return {
... "precision": results["overall_precision"],
... "recall": results["overall_recall"],
... "f1": results["overall_f1"],
... "accuracy": results["overall_accuracy"],
... }
```
دالة `compute_metrics` جاهزة للاستخدام، وستحتاج إليها عند إعداد التدريب.
## التدريب(Train)
قبل تدريب النموذج، جهّز خريطة تربط بين المعرّفات المتوقعة وتسمياتها باستخدام `id2label` و `label2id`:
```py
>>> id2label = {
... 0: "O",
... 1: "B-corporation",
... 2: "I-corporation",
... 3: "B-creative-work",
... 4: "I-creative-work",
... 5: "B-group",
... 6: "I-group",
... 7: "B-location",
... 8: "I-location",
... 9: "B-person",
... 10: "I-person",
... 11: "B-product",
... 12: "I-product",
... }
>>> label2id = {
... "O": 0,
... "B-corporation": 1,
... "I-corporation": 2,
... "B-creative-work": 3,
... "I-creative-work": 4,
... "B-group": 5,
... "I-group": 6,
... "B-location": 7,
... "I-location": 8,
... "B-person": 9,
... "I-person": 10,
... "B-product": 11,
... "I-product": 12,
... }
```
<frameworkcontent>
<pt>
<Tip>
إذا لم تكن على دراية بتعديل نموذج باستخدام [`Trainer`], ألق نظرة على الدليل التعليمي الأساسي [هنا](../training#train-with-pytorch-trainer)!
</Tip>
أنت مستعد الآن لبدء تدريب نموذجك! قم بتحميل DistilBERT مع [`AutoModelForTokenClassification`] إلى جانب عدد التصنيفات المتوقعة، وخريطة التسميات:
```py
>>> from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer
>>> model = AutoModelForTokenClassification.from_pretrained(
... "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
... )
```
في هذه المرحلة، هناك ثلاث خطوات فقط متبقية:
1. حدد معلمات التدريب الخاصة بك في [`TrainingArguments`]. المعامل الوحيد المطلوب هو `output_dir` الذي يحدد مكان حفظ نموذجك. ستقوم بدفع هذا النموذج إلى Hub عن طريق تعيين `push_to_hub=True` (يجب أن تكون مسجلاً الدخول إلى Hugging Face لتحميل نموذجك). في نهاية كل حقبة، سيقوم [`Trainer`] بتقييم درجات seqeval وحفظ تسخة التدريب.
2. قم بتمرير معاملات التدريب إلى [`Trainer`] إلى جانب النموذج، ومجموعة البيانات، والمُجزِّئ اللغوي، و`data collator`، ودالة `compute_metrics`.
3.استدعِ [`~Trainer.train`] لتدريب نموذجك.
```py
>>> training_args = TrainingArguments(
... output_dir="my_awesome_wnut_model",
... learning_rate=2e-5,
... per_device_train_batch_size=16,
... per_device_eval_batch_size=16,
... num_train_epochs=2,
... weight_decay=0.01,
... eval_strategy="epoch",
... save_strategy="epoch",
... load_best_model_at_end=True,
... push_to_hub=True,
... )
>>> trainer = Trainer(
... model=model,
... args=training_args,
... train_dataset=tokenized_wnut["train"],
... eval_dataset=tokenized_wnut["test"],
... processing_class=tokenizer,
... data_collator=data_collator,
... compute_metrics=compute_metrics,
... )
>>> trainer.train()
```
بمجرد اكتمال التدريب، شارك نموذجك على Hub باستخدام طريقة [`~transformers.Trainer.push_to_hub`] حتى يتمكن الجميع من استخدام نموذجك:
```py
>>> trainer.push_to_hub()
```
</pt>
<tf>
<Tip>
إذا لم تكن على دراية بتعديل نموذج باستخدام Keras، ألق نظرة على الدليل التعليمي الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
</Tip>
للتعديل على نموذج في TensorFlow، ابدأ بإعداد دالة محسن، وجدول معدل التعلم، وبعض معلمات التدريب:
```py
>>> from transformers import create_optimizer
>>> batch_size = 16
>>> num_train_epochs = 3
>>> num_train_steps = (len(tokenized_wnut["train"]) // batch_size) * num_train_epochs
>>> optimizer, lr_schedule = create_optimizer(
... init_lr=2e-5,
... num_train_steps=num_train_steps,
... weight_decay_rate=0.01,
... num_warmup_steps=0,
... )
```
ثم يمكنك تحميل DistilBERT مع [`TFAutoModelForTokenClassification`] إلى جانب عدد التسميات المتوقعة، وتخطيطات التسميات:
```py
>>> from transformers import TFAutoModelForTokenClassification
>>> model = TFAutoModelForTokenClassification.from_pretrained(
... "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
... )
```
قم بتحويل مجموعات بياناتك إلى تنسيق `tf.data.Dataset` مع [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
```py
>>> tf_train_set = model.prepare_tf_dataset(
... tokenized_wnut["train"],
... shuffle=True,
... batch_size=16,
... collate_fn=data_collator,
... )
>>> tf_validation_set = model.prepare_tf_dataset(
... tokenized_wnut["validation"],
... shuffle=False,
... batch_size=16,
... collate_fn=data_collator,
... )
```
هيّئ النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن نماذج Transformers تتضمن دالة خسارة افتراضية مرتبطة بالمهمة، لذلك لا تحتاج إلى تحديد واحدة إلا إذا كنت ترغب في ذلك:
```py
>>> import tensorflow as tf
>>> model.compile(optimizer=optimizer) # No loss argument!
```
آخر أمرين يجب إعدادهما قبل بدء التدريب هو حساب درجات seqeval من التنبؤات، وتوفير طريقة لدفع نموذجك إلى Hub. يتم ذلك باستخدام [Keras callbacks](../main_classes/keras_callbacks).
مرر دالة `compute_metrics` الخاصة بك إلى [`~transformers.KerasMetricCallback`]:
```py
>>> from transformers.keras_callbacks import KerasMetricCallback
>>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
```
حدد مكان دفع نموذجك والمحلل اللغوي في [`~transformers.PushToHubCallback`]:
```py
>>> from transformers.keras_callbacks import PushToHubCallback
>>> push_to_hub_callback = PushToHubCallback(
... output_dir="my_awesome_wnut_model",
... tokenizer=tokenizer,
... )
```
ثم جمّع callbacks الخاصة بك معًا:
```py
>>> callbacks = [metric_callback, push_to_hub_callback]
```
أخيرًا، أنت جاهز الآن لبدء تدريب نموذجك! قم باستدعاء [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع بيانات التدريب والتحقق، وعدد الحقبات، وcallbacks لتعديل النموذج:
```py
>>> model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
```
بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
</tf>
</frameworkcontent>
<Tip>
للحصول على مثال أكثر تفصيلاً حول كيفية تعديل نموذج لتصنيف الرموز، ألق نظرة على الدفتر المقابل
[دفتر PyTorch](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/token_classification.ipynb)
أو [دفتر TensorFlow](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/token_classification-tf.ipynb).
</Tip>
## الاستدلال(Inference)
رائع، الآن بعد أن قمت بتعديل نموذج، يمكنك استخدامه للاستدلال!
احصل على بعض النصوص التي تريد تشغيل الاستدلال عليها:
```py
>>> text = "The Golden State Warriors are an American professional basketball team based in San Francisco."
```
أبسط طريقة لتجربة نموذجك المُدرب مسبقًا للاستدلال هي استخدامه في [`pipeline`]. قم بتنفيذ `pipeline` لتصنيف الكيانات المسماة مع نموذجك، ومرر نصك إليه:
```py
>>> from transformers import pipeline
>>> classifier = pipeline("ner", model="stevhliu/my_awesome_wnut_model")
>>> classifier(text)
[{'entity': 'B-location',
'score': 0.42658573,
'index': 2,
'word': 'golden',
'start': 4,
'end': 10},
{'entity': 'I-location',
'score': 0.35856336,
'index': 3,
'word': 'state',
'start': 11,
'end': 16},
{'entity': 'B-group',
'score': 0.3064001,
'index': 4,
'word': 'warriors',
'start': 17,
'end': 25},
{'entity': 'B-location',
'score': 0.65523505,
'index': 13,
'word': 'san',
'start': 80,
'end': 83},
{'entity': 'B-location',
'score': 0.4668663,
'index': 14,
'word': 'francisco',
'start': 84,
'end': 93}]
```
يمكنك أيضًا تكرار نتائج `pipeline` يدويًا إذا أردت:
<frameworkcontent>
<pt>
قسّم النص إلى رموز وأرجع المُوتّرات بلغة PyTorch:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
>>> inputs = tokenizer(text, return_tensors="pt")
```
مرر مدخلاتك إلى النموذج واحصل على `logits`:
```py
>>> from transformers import AutoModelForTokenClassification
>>> model = AutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
>>> with torch.no_grad():
... logits = model(**inputs).logits
```
استخرج الفئة ذات الاحتمالية الأعلى، واستخدم جدول `id2label` الخاصة بالنموذج لتحويلها إلى تسمية نصية:
```py
>>> predictions = torch.argmax(logits, dim=2)
>>> predicted_token_class = [model.config.id2label[t.item()] for t in predictions[0]]
>>> predicted_token_class
['O',
'O',
'B-location',
'I-location',
'B-group',
'O',
'O',
'O',
'O',
'O',
'O',
'O',
'O',
'B-location',
'B-location',
'O',
'O']
```
</pt>
<tf>
قسّم النص إلى رموز وأرجع المُوتّرات ب TensorFlow:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
>>> inputs = tokenizer(text, return_tensors="tf")
```
مرر مدخلاتك إلى النموذج واحصل على `logits`:
```py
>>> from transformers import TFAutoModelForTokenClassification
>>> model = TFAutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
>>> logits = model(**inputs).logits
```
استخرج الفئة ذات الاحتمالية الأعلى، واستخدم جدول `id2label` الخاصة بالنموذج لتحويلها إلى تسمية نصية:
```py
>>> predicted_token_class_ids = tf.math.argmax(logits, axis=-1)
>>> predicted_token_class = [model.config.id2label[t] for t in predicted_token_class_ids[0].numpy().tolist()]
>>> predicted_token_class
['O',
'O',
'B-location',
'I-location',
'B-group',
'O',
'O',
'O',
'O',
'O',
'O',
'O',
'O',
'B-location',
'B-location',
'O',
'O']
```
</tf>
</frameworkcontent>

View File

@ -1,407 +0,0 @@
<!--Copyright 2022 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# الترجمة(Translation)
[[open-in-colab]]
<Youtube id="1JvfrvZgi6c"/>
الترجمة هي عملية تحويل سلسلة نصية من لغة إلى أخرى. وهي إحدى المهام التي يمكن صياغتها كمسألة تسلسل إلى تسلسل، وهو إطار عمل قوي لإنتاج مخرجات من مدخلات، مثل الترجمة أو التلخيص. تُستخدم أنظمة الترجمة عادةً للترجمة بين نصوص لغات مختلفة، ويمكن استخدامها أيضًا لترجمة الكلام أو لمهام تجمع بين النصوص والكلام، مثل تحويل النص إلى كلام أو تحويل الكلام إلى نص.
سيوضح لك هذا الدليل كيفية:
1. ضبط دقيق لنموذج [T5](https://huggingface.co/google-t5/t5-small) على المجموعة الفرعية الإنجليزية-الفرنسية من مجموعة بيانات [OPUS Books](https://huggingface.co/datasets/opus_books) لترجمة النص الإنجليزي إلى الفرنسية.
2. استخدام النموذج المضبوط بدقة للاستدلال.
<Tip>
لمشاهدة جميع البنى والنسخ المتوافقة مع هذه المهمة، نوصي بالتحقق من [صفحة المهمة](https://huggingface.co/tasks/translation).
</Tip>
قبل البدء، تأكد من تثبيت جميع المكتبات الضرورية:
```bash
pip install transformers datasets evaluate sacrebleu
```
نشجعك على تسجيل الدخول إلى حساب Hugging Face الخاص بك حتى تتمكن من تحميل نموذجك ومشاركته مع المجتمع. عند الطلب، أدخل الرمز المميز الخاص بك لتسجيل الدخول:
```py
>>> from huggingface_hub import notebook_login
>>> notebook_login()
```
## تحميل مجموعة بيانات OPUS Books
ابدأ بتحميل المجموعة الفرعية الإنجليزية-الفرنسية من مجموعة بيانات [OPUS Books](https://huggingface.co/datasets/opus_books) من مكتبة 🤗 Datasets:
```py
>>> from datasets import load_dataset
>>> books = load_dataset("opus_books", "en-fr")
```
قسّم مجموعة البيانات إلى مجموعة تدريب ومجموعة اختبار باستخدام طريقة [`~datasets.Dataset.train_test_split`]:
```py
>>> books = books["train"].train_test_split(test_size=0.2)
```
ثم ألقِ نظرة على مثال:
```py
>>> books["train"][0]
{'id': '90560',
'translation': {'en': 'But this lofty plateau measured only a few fathoms, and soon we reentered Our Element.',
'fr': 'Mais ce plateau élevé ne mesurait que quelques toises, et bientôt nous fûmes rentrés dans notre élément.'}}
```
`translation`: ترجمة إنجليزية وفرنسية للنص.
## المعالجة المسبقة(Preprocess)
<Youtube id="XAR8jnZZuUs"/>
الخطوة التالية هي تحميل مُجزئ T5 لمعالجة أزواج اللغة الإنجليزية-الفرنسية:
```py
>>> from transformers import AutoTokenizer
>>> checkpoint = "google-t5/t5-small"
>>> tokenizer = AutoTokenizer.from_pretrained(checkpoint)
```
يجب أن تقوم دالة المعالجة المسبقة التي تُريد إنشاءها بما يلي:
1. إضافة بادئة إلى المُدخل بمُوجه حتى يعرف T5 أن هذه مهمة ترجمة. تتطلب بعض النماذج القادرة على أداء مهام متعددة توجيهًا لمهام مُحددة.
2. تعيين اللغة الهدف (الفرنسية) في معامل `text_target` لضمان معالجة المُجزئ للنص بشكل صحيح. إذا لم تُعيّن `text_target`، فسيُعالج المُجزئ النص على أنه إنجليزي.
3. اقتطاع التسلسلات بحيث لا يزيد طولها عن الحد الأقصى الذي يحدده معامل `max_length`.
```py
>>> source_lang = "en"
>>> target_lang = "fr"
>>> prefix = "translate English to French: "
>>> def preprocess_function(examples):
... inputs = [prefix + example[source_lang] for example in examples["translation"]]
... targets = [example[target_lang] for example in examples["translation"]]
... model_inputs = tokenizer(inputs, text_target=targets, max_length=128, truncation=True)
... return model_inputs
```
لتطبيق دالة المعالجة المسبقة على مجموعة البيانات بأكملها، استخدم طريقة [`~datasets.Dataset.map`] من 🤗 Datasets. يمكنك تسريع دالة `map` عن طريق تعيين `batched=True` لمعالجة عناصر متعددة من مجموعة البيانات في وقت واحد:
```py
>>> tokenized_books = books.map(preprocess_function, batched=True)
```
الآن أنشئ دفعة من الأمثلة باستخدام [`DataCollatorForSeq2Seq`]. من الأكثر كفاءة *الحشو الديناميكي* للجمل إلى أطول طول في دفعة أثناء التجميع، بدلاً من حشو مجموعة البيانات بأكملها إلى الحد الأقصى للطول.
<frameworkcontent>
<pt>
```py
>>> from transformers import DataCollatorForSeq2Seq
>>> data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
```
</pt>
<tf>
```py
>>> from transformers import DataCollatorForSeq2Seq
>>> data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
```
</tf>
</frameworkcontent>
## التقييم (Evaluate)
غالباً ما يكون تضمين مقياس أثناء التدريب مفيداً لتقييم أداء نموذجك. يمكنك تحميل طريقة تقييم بسرعة باستخدام مكتبة 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index). لهذه المهمة، حمّل مقياس [SacreBLEU](https://huggingface.co/spaces/evaluate-metric/sacrebleu) (راجع [الجولة السريعة](https://huggingface.co/docs/evaluate/a_quick_tour) لـ 🤗 Evaluate لمعرفة المزيد حول كيفية تحميل وحساب مقياس):
```py
>>> import evaluate
>>> metric = evaluate.load("sacrebleu")
```
ثم أنشئ دالة تُمرر تنبؤاتك وتسمياتك إلى [`~evaluate.EvaluationModule.compute`] لحساب درجة SacreBLEU:
```py
>>> import numpy as np
>>> def postprocess_text(preds, labels):
... preds = [pred.strip() for pred in preds]
... labels = [[label.strip()] for label in labels]
... return preds, labels
>>> def compute_metrics(eval_preds):
... preds, labels = eval_preds
... if isinstance(preds, tuple):
... preds = preds[0]
... decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
... labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
... decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
... decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)
... result = metric.compute(predictions=decoded_preds, references=decoded_labels)
... result = {"bleu": result["score"]}
... prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
... result["gen_len"] = np.mean(prediction_lens)
... result = {k: round(v, 4) for k, v in result.items()}
... return result
```
دالة `compute_metrics` الخاصة بك جاهزة الآن، وسوف تعود إليها عند إعداد التدريب.
## التدريب (Train)
<frameworkcontent>
<pt>
<Tip>
إذا لم تكن معتادًا على ضبط دقيق نموذج باستخدام [`Trainer`], فألقِ نظرة على البرنامج التعليمي الأساسي [هنا](../training#train-with-pytorch-trainer)!
</Tip>
أنت جاهز لبدء تدريب نموذجك الآن! حمّل T5 باستخدام [`AutoModelForSeq2SeqLM`]:
```py
>>> from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
>>> model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
```
في هذه المرحلة، تبقى ثلاث خطوات فقط:
1. حدد مُعاملات للتدريب في [`Seq2SeqTrainingArguments`]. المُعامل الوحيدة المطلوبة هي `output_dir` التي تحدد مكان حفظ النموذج الخاص بك. ستقوم بدفع هذا النموذج إلى Hub عن طريق تعيين `push_to_hub=True` (يجب عليك تسجيل الدخول إلى Hugging Face لتحميل نموذجك). في نهاية كل حقبة، سيقوم [`Trainer`] بتقييم مقياس SacreBLEU وحفظ نقطة تدقيق التدريب.
2. مرر مُعاملات التدريب إلى [`Seq2SeqTrainer`] جنبًا إلى جنب مع النموذج ومجموعة البيانات والمعالج اللغوي وجامع البيانات ووظيفة `compute_metrics`.
3. نفّذ [`~Trainer.train`] لضبط نموذجك.
```py
>>> training_args = Seq2SeqTrainingArguments(
... output_dir="my_awesome_opus_books_model",
... eval_strategy="epoch",
... learning_rate=2e-5,
... per_device_train_batch_size=16,
... per_device_eval_batch_size=16,
... weight_decay=0.01,
... save_total_limit=3,
... num_train_epochs=2,
... predict_with_generate=True,
... fp16=True, #change to bf16=True for XPU
... push_to_hub=True,
... )
>>> trainer = Seq2SeqTrainer(
... model=model,
... args=training_args,
... train_dataset=tokenized_books["train"],
... eval_dataset=tokenized_books["test"],
... processing_class=tokenizer,
... data_collator=data_collator,
... compute_metrics=compute_metrics,
... )
>>> trainer.train()
```
بمجرد اكتمال التدريب، شارك نموذجك مع Hub باستخدام طريقة [`~transformers.Trainer.push_to_hub`] حتى يتمكن الجميع من استخدام نموذجك:
```py
>>> trainer.push_to_hub()
```
</pt>
<tf>
<Tip>
إذا لم تكن معتادًا على ضبط نموذج باستخدام Keras، فألق نظرة على البرنامج التعليمي الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
</Tip>
لضبط نموذج في TensorFlow، ابدأ بإعداد دالة مُحسِّن وجدول معدل تعلم وبعض المعلمات الفائقة للتدريب:
```py
>>> from transformers import AdamWeightDecay
>>> optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
```
ثم يمكنك تحميل T5 باستخدام [`TFAutoModelForSeq2SeqLM`]:
```py
>>> from transformers import TFAutoModelForSeq2SeqLM
>>> model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
```
حوّل مجموعات البيانات الخاصة بك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
```py
>>> tf_train_set = model.prepare_tf_dataset(
... tokenized_books["train"],
... shuffle=True,
... batch_size=16,
... collate_fn=data_collator,
... )
>>> tf_test_set = model.prepare_tf_dataset(
... tokenized_books["test"],
... shuffle=False,
... batch_size=16,
... collate_fn=data_collator,
... )
```
قم بتكوين النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن جميع نماذج Transformers تحتوي على دالة خسارة ذات صلة بالمهمة بشكل افتراضي، لذلك لا تحتاج إلى تحديد واحدة إلا إذا كنت ترغب في ذلك:
```py
>>> import tensorflow as tf
>>> model.compile(optimizer=optimizer) # No loss argument!
```
آخر شيئين يجب إعدادهما قبل بدء التدريب هما حساب مقياس SacreBLEU من التوقعات، وتوفير طريقة لدفع نموذجك إلى Hub. يتم كلاهما باستخدام [استدعاءات Keras](../main_classes/keras_callbacks).
مرر دالة `compute_metrics` الخاصة بك إلى [`~transformers.KerasMetricCallback`]:
```py
>>> from transformers.keras_callbacks import KerasMetricCallback
>>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_test_set)
```
حدد مكان دفع نموذجك ومعالجك اللغوي في [`~transformers.PushToHubCallback`]:
```py
>>> from transformers.keras_callbacks import PushToHubCallback
>>> push_to_hub_callback = PushToHubCallback(
... output_dir="my_awesome_opus_books_model",
... tokenizer=tokenizer,
... )
```
ثم اجمع استدعاءاتك معًا:
```py
>>> callbacks = [metric_callback, push_to_hub_callback]
```
أخيرًا، أنت جاهز لبدء تدريب نموذجك! اتصل بـ [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق من الصحة وعدد الحقب واستدعاءاتك لضبط النموذج:
```py
>>> model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
```
بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
</tf>
</frameworkcontent>
<Tip>
للحصول على مثال أكثر تعمقًا لكيفية ضبط نموذج للترجمة، ألق نظرة على [دفتر ملاحظات PyTorch](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/translation.ipynb) المقابل
أو [دفتر ملاحظات TensorFlow](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/translation-tf.ipynb).
</Tip>
## الاستدلال (Inference)
رائع، الآن بعد أن قمت بضبط نموذج، يمكنك استخدامه للاستدلال!
أحضر بعض النصوص التي ترغب في ترجمتها إلى لغة أخرى. بالنسبة لـ T5، تحتاج إلى إضافة بادئة إلى مدخلاتك اعتمادًا على المهمة التي تعمل عليها. للترجمة من الإنجليزية إلى الفرنسية، يجب عليك إضافة بادئة إلى مدخلاتك كما هو موضح أدناه:
```py
>>> text = "translate English to French: Legumes share resources with nitrogen-fixing bacteria."
```
أبسط طريقة لتجربة نموذجك المضبوط للاستدلال هي استخدامه في [`pipeline`]. قم بإنشاء مثيل لـ `pipeline` للترجمة باستخدام نموذجك، ومرر النص الخاص بك إليه:
```py
>>> from transformers import pipeline
# تغيير `xx` إلى لغة الإدخال و `yy` إلى لغة المخرجات المطلوبة.
# أمثلة: "en" للغة الإنجليزية، "fr" للغة الفرنسية، "de" للغة الألمانية، "es" للغة الإسبانية، "zh" للغة الصينية، إلخ؛ translation_en_to_fr تترجم من الإنجليزية إلى الفرنسية
# يمكنك عرض جميع قوائم اللغات هنا - https://huggingface.co/languages
>>> translator = pipeline("translation_xx_to_yy", model="username/my_awesome_opus_books_model")
>>> translator(text)
[{'translation_text': 'Legumes partagent des ressources avec des bactéries azotantes.'}]
```
يمكنك أيضًا تكرار نتائج `pipeline` يدويًا إذا أردت:
<frameworkcontent>
<pt>
قم بتحويل النص إلى رموز وإرجاع `input_ids` كموترات PyTorch:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_opus_books_model")
>>> inputs = tokenizer(text, return_tensors="pt").input_ids
```
استخدم الدالة [`~generation.GenerationMixin.generate`] لإنشاء الترجمة. لمزيد من التفاصيل حول استراتيجيات توليد النصوص المختلفة والمعلمات للتحكم في التوليد، تحقق من واجهة برمجة تطبيقات [توليد النصوص](../main_classes/text_generation).
```py
>>> from transformers import AutoModelForSeq2SeqLM
>>> model = AutoModelForSeq2SeqLM.from_pretrained("username/my_awesome_opus_books_model")
>>> outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
```
فك تشفير معرفات الرموز المولدة مرة أخرى إلى نص:
```py
>>> tokenizer.decode(outputs[0], skip_special_tokens=True)
'Les lignées partagent des ressources avec des bactéries enfixant l'azote.'
```
</pt>
<tf>
قم بتحويل النص إلى رموز وإرجاع `input_ids` كموترات TensorFlow:
```py
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_opus_books_model")
>>> inputs = tokenizer(text, return_tensors="tf").input_ids
```
استخدم طريقة [`~transformers.generation_tf_utils.TFGenerationMixin.generate`] لإنشاء الترجمة. لمزيد من التفاصيل حول استراتيجيات توليد النصوص المختلفة والمعلمات للتحكم في التوليد، تحقق من واجهة برمجة تطبيقات [توليد النصوص](../main_classes/text_generation).
```py
>>> from transformers import TFAutoModelForSeq2SeqLM
>>> model = TFAutoModelForSeq2SeqLM.from_pretrained("username/my_awesome_opus_books_model")
>>> outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
```
فك تشفير معرفات الرموز المولدة مرة أخرى إلى نص:
```py
>>> tokenizer.decode(outputs[0], skip_special_tokens=True)
'Les lugumes partagent les ressources avec des bactéries fixatrices d'azote.'
```
</tf>
</frameworkcontent>

View File

@ -1,41 +0,0 @@
# Tiktoken والتفاعل مع Transformers
يتم دمج دعم ملفات نموذج tiktoken بسلاسة في 🤗 transformers عند تحميل النماذج
`from_pretrained` مع ملف `tokenizer.model` tiktoken على Hub، والذي يتم تحويله تلقائيًا إلى [المحلل اللغوي السريع](https://huggingface.co/docs/transformers/main/en/main_classes/tokenizer#transformers.PreTrainedTokenizerFast).
### النماذج المعروفة التي تم إصدارها مع `tiktoken.model`:
- gpt2
- llama3
## مثال على الاستخدام
من أجل تحميل ملفات `tiktoken` في `transformers`، تأكد من أن ملف `tokenizer.model` هو ملف tiktoken وسيتم تحميله تلقائيًا عند التحميل `from_pretrained`. إليك كيفية تحميل مجزىء لغوي ونموذج، والذي
يمكن تحميله من نفس الملف بالضبط:
```py
from transformers import AutoTokenizer
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, subfolder="original")
```
## إنشاء مجزىء لغوي tiktoken
لا يحتوي ملف `tokenizer.model` على أي معلومات حول الرموز أو الأنماط الإضافية. إذا كانت هذه الأمور مهمة، قم بتحويل المحلل اللغوي إلى `tokenizer.json`، وهو التنسيق المناسب لـ [`PreTrainedTokenizerFast`].
قم بتوليد ملف `tokenizer.model` باستخدام [tiktoken.get_encoding](https://github.com/openai/tiktoken/blob/63527649963def8c759b0f91f2eb69a40934e468/tiktoken/registry.py#L63) ثم قم بتحويله إلى `tokenizer.json` باستخدام [`convert_tiktoken_to_fast`].
```py
from transformers.integrations.tiktoken import convert_tiktoken_to_fast
from tiktoken import get_encoding
# يمكنك تحميل ترميزك المخصص أو الترميز الذي توفره OpenAI
encoding = get_encoding("gpt2")
convert_tiktoken_to_fast(encoding, "config/save/dir")
```
يتم حفظ ملف `tokenizer.json` الناتج في الدليل المحدد ويمكن تحميله باستخدام [`PreTrainedTokenizerFast`].
```py
tokenizer = PreTrainedTokenizerFast.from_pretrained("config/save/dir")
```

View File

@ -109,7 +109,7 @@ label: NEGATIVE, with score: 0.5309
Die [`pipeline`] kann auch über einen ganzen Datensatz iterieren. Starten wir mit der Installation der [🤗 Datasets](https://huggingface.co/docs/datasets/) Bibliothek:
```bash
pip install datasets
pip install datasets
```
Erstellen wir eine [`pipeline`] mit der Aufgabe die wir lösen und dem Modell welches wir nutzen möchten.
@ -191,7 +191,7 @@ Wenn Sie kein Modell für Ihren Anwendungsfall finden können, müssen Sie ein v
<Youtube id="AhChOFRegn4"/>
Unter der Haube arbeiten die Klassen [`AutoModelForSequenceClassification`] und [`AutoTokenizer`] zusammen, um die [`pipeline`] zu betreiben. Eine [`AutoClass`](./model_doc/auto) ist eine Abkürzung, die automatisch die Architektur eines trainierten Modells aus dessen Namen oder Pfad abruft. Sie müssen nur die passende `AutoClass` für Ihre Aufgabe und den zugehörigen Tokenizer mit [`AutoTokenizer`] auswählen.
Unter der Haube arbeiten die Klassen [`AutoModelForSequenceClassification`] und [`AutoTokenizer`] zusammen, um die [`pipeline`] zu betreiben. Eine [`AutoClass`](./model_doc/auto) ist eine Abkürzung, die automatisch die Architektur eines trainierten Modells aus dessen Namen oder Pfad abruft. Sie müssen nur die passende `AutoClass` für Ihre Aufgabe und den zugehörigen Tokenizer mit [`AutoTokenizer`] auswählen.
Kehren wir zu unserem Beispiel zurück und sehen wir uns an, wie Sie die `AutoClass` verwenden können, um die Ergebnisse der [`pipeline`] zu replizieren.
@ -281,7 +281,7 @@ Jetzt können Sie Ihren vorverarbeiteten Stapel von Eingaben direkt an das Model
```
Das Modell gibt die endgültigen Aktivierungen in dem Attribut "logits" aus. Wenden Sie die Softmax-Funktion auf die "logits" an, um die Wahrscheinlichkeiten zu erhalten:
```py
>>> from torch import nn
@ -308,7 +308,7 @@ In der [Aufgabenzusammenfassung](./task_summary) steht, welche [AutoModel]-Klass
</Tip>
Jetzt können Sie Ihren vorverarbeiteten Stapel von Eingaben direkt an das Modell übergeben, indem Sie die Wörterbuchschlüssel direkt an die Tensoren übergeben:
```py
>>> tf_outputs = tf_model(tf_batch)
```
@ -383,8 +383,8 @@ Ein besonders cooles 🤗 Transformers-Feature ist die Möglichkeit, ein Modell
```py
>>> from transformers import AutoModel
>>> tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
>>> pt_model = AutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
>>> tokenizer = AutoTokenizer.from_pretrained(tf_save_directory)
>>> pt_model = AutoModelForSequenceClassification.from_pretrained(tf_save_directory, from_tf=True)
```
</pt>
<tf>
@ -392,8 +392,8 @@ Ein besonders cooles 🤗 Transformers-Feature ist die Möglichkeit, ein Modell
```py
>>> from transformers import TFAutoModel
>>> tokenizer = AutoTokenizer.from_pretrained(tf_save_directory)
>>> tf_model = TFAutoModelForSequenceClassification.from_pretrained(tf_save_directory, from_tf=True)
>>> tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
>>> tf_model = TFAutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
```
</tf>
</frameworkcontent>

View File

@ -110,17 +110,6 @@
- local: kv_cache
title: Best Practices for Generation with Cache
title: Generation
- isExpanded: false
sections:
- local: chat_template_basics
title: Getting Started with Chat Templates for Text LLMs
- local: chat_template_multimodal
title: Multimodal Chat Templates for Vision and Audio LLMs
- local: chat_template_tools_and_documents
title: Expanding Chat Templates with Tools and Documents
- local: chat_template_advanced
title: Advanced Usage and Customizing Your Chat Templates
title: Chat Templates
- isExpanded: false
sections:
- local: tasks/idefics
@ -138,6 +127,8 @@
title: Use model-specific APIs
- local: custom_models
title: Share a custom model
- local: chat_templating
title: Chat templates
- local: trainer
title: Trainer
- local: sagemaker
@ -148,6 +139,8 @@
title: Export to TFLite
- local: torchscript
title: Export to TorchScript
- local: benchmarks
title: Benchmarks
- local: notebooks
title: Notebooks with examples
- local: community
@ -174,16 +167,10 @@
title: AWQ
- local: quantization/aqlm
title: AQLM
- local: quantization/vptq
title: SpQR
- local: quantization/spqr
title: VPTQ
- local: quantization/quanto
title: Quanto
- local: quantization/eetq
title: EETQ
- local: quantization/higgs
title: HIGGS
- local: quantization/hqq
title: HQQ
- local: quantization/fbgemm_fp8
@ -196,8 +183,6 @@
title: BitNet
- local: quantization/compressed_tensors
title: compressed-tensors
- local: quantization/finegrained_fp8
title: Fine-grained FP8
- local: quantization/contribute
title: Contribute new quantization method
title: Quantization Methods
@ -337,8 +322,6 @@
sections:
- local: model_doc/albert
title: ALBERT
- local: model_doc/bamba
title: Bamba
- local: model_doc/bart
title: BART
- local: model_doc/barthez
@ -379,8 +362,6 @@
title: CodeLlama
- local: model_doc/cohere
title: Cohere
- local: model_doc/cohere2
title: Cohere2
- local: model_doc/convbert
title: ConvBERT
- local: model_doc/cpm
@ -397,8 +378,6 @@
title: DeBERTa-v2
- local: model_doc/dialogpt
title: DialoGPT
- local: model_doc/diffllama
title: DiffLlama
- local: model_doc/distilbert
title: DistilBERT
- local: model_doc/dpr
@ -415,10 +394,10 @@
title: ESM
- local: model_doc/falcon
title: Falcon
- local: model_doc/falcon3
title: Falcon3
- local: model_doc/falcon_mamba
title: FalconMamba
- local: model_doc/fastspeech2_conformer
title: FastSpeech2Conformer
- local: model_doc/flan-t5
title: FLAN-T5
- local: model_doc/flan-ul2
@ -461,12 +440,6 @@
title: Granite
- local: model_doc/granitemoe
title: GraniteMoe
- local: model_doc/granitemoeshared
title: GraniteMoeShared
- local: model_doc/granitevision
title: GraniteVision
- local: model_doc/helium
title: Helium
- local: model_doc/herbert
title: HerBERT
- local: model_doc/ibert
@ -519,8 +492,6 @@
title: mLUKE
- local: model_doc/mobilebert
title: MobileBERT
- local: model_doc/modernbert
title: ModernBert
- local: model_doc/mpnet
title: MPNet
- local: model_doc/mpt
@ -641,8 +612,6 @@
title: YOSO
- local: model_doc/zamba
title: Zamba
- local: model_doc/zamba2
title: Zamba2
title: Text models
- isExpanded: false
sections:
@ -658,8 +627,6 @@
title: ConvNeXTV2
- local: model_doc/cvt
title: CvT
- local: model_doc/dab-detr
title: DAB-DETR
- local: model_doc/deformable_detr
title: Deformable DETR
- local: model_doc/deit
@ -668,8 +635,6 @@
title: Depth Anything
- local: model_doc/depth_anything_v2
title: Depth Anything V2
- local: model_doc/depth_pro
title: DepthPro
- local: model_doc/deta
title: DETA
- local: model_doc/detr
@ -678,8 +643,6 @@
title: DiNAT
- local: model_doc/dinov2
title: DINOV2
- local: model_doc/dinov2_with_registers
title: DINOv2 with Registers
- local: model_doc/dit
title: DiT
- local: model_doc/dpt
@ -726,14 +689,10 @@
title: ResNet
- local: model_doc/rt_detr
title: RT-DETR
- local: model_doc/rt_detr_v2
title: RT-DETRv2
- local: model_doc/segformer
title: SegFormer
- local: model_doc/seggpt
title: SegGpt
- local: model_doc/superglue
title: SuperGlue
- local: model_doc/superpoint
title: SuperPoint
- local: model_doc/swiftformer
@ -746,10 +705,6 @@
title: Swin2SR
- local: model_doc/table-transformer
title: Table Transformer
- local: model_doc/textnet
title: TextNet
- local: model_doc/timm_wrapper
title: Timm Wrapper
- local: model_doc/upernet
title: UperNet
- local: model_doc/van
@ -766,8 +721,6 @@
title: ViTMatte
- local: model_doc/vit_msn
title: ViTMSN
- local: model_doc/vitpose
title: ViTPose
- local: model_doc/yolos
title: YOLOS
- local: model_doc/zoedepth
@ -785,8 +738,8 @@
title: dac
- local: model_doc/encodec
title: EnCodec
- local: model_doc/fastspeech2_conformer
title: FastSpeech2Conformer
- local: model_doc/hiera
title: Hiera
- local: model_doc/hubert
title: Hubert
- local: model_doc/mctct
@ -795,8 +748,6 @@
title: Mimi
- local: model_doc/mms
title: MMS
- local: model_doc/moonshine
title: Moonshine
- local: model_doc/moshi
title: Moshi
- local: model_doc/musicgen
@ -859,8 +810,6 @@
title: ALIGN
- local: model_doc/altclip
title: AltCLIP
- local: model_doc/aria
title: Aria
- local: model_doc/blip
title: BLIP
- local: model_doc/blip-2
@ -879,22 +828,16 @@
title: CLIPSeg
- local: model_doc/clvp
title: CLVP
- local: model_doc/colpali
title: ColPali
- local: model_doc/data2vec
title: Data2Vec
- local: model_doc/deplot
title: DePlot
- local: model_doc/donut
title: Donut
- local: model_doc/emu3
title: Emu3
- local: model_doc/flava
title: FLAVA
- local: model_doc/git
title: GIT
- local: model_doc/got_ocr2
title: GOT-OCR2
- local: model_doc/grounding-dino
title: Grounding DINO
- local: model_doc/groupvit
@ -955,8 +898,6 @@
title: Pix2Struct
- local: model_doc/pixtral
title: Pixtral
- local: model_doc/qwen2_5_vl
title: Qwen2.5-VL
- local: model_doc/qwen2_audio
title: Qwen2Audio
- local: model_doc/qwen2_vl

View File

@ -184,7 +184,7 @@ class PairClassificationPipeline(Pipeline):
```
The implementation is framework agnostic, and will work for PyTorch and TensorFlow models. If we have saved this in
a file named `pair_classification.py`, we can then import it and register it like this.
a file named `pair_classification.py`, we can then import it and register it like this:
```py
from pair_classification import PairClassificationPipeline
@ -199,22 +199,6 @@ PIPELINE_REGISTRY.register_pipeline(
)
```
The [register_pipeline](https://github.com/huggingface/transformers/blob/9feae5fb0164e89d4998e5776897c16f7330d3df/src/transformers/pipelines/base.py#L1387) function registers the pipeline details (task type, pipeline class, supported backends) to a models `config.json` file.
```json
"custom_pipelines": {
"pair-classification": {
"impl": "pair_classification.PairClassificationPipeline",
"pt": [
"AutoModelForSequenceClassification"
],
"tf": [
"TFAutoModelForSequenceClassification"
],
}
},
```
Once this is done, we can use it with a pretrained model. For instance `sgugger/finetuned-bert-mrpc` has been
fine-tuned on the MRPC dataset, which classifies pairs of sentences as paraphrases or not.

View File

@ -162,7 +162,7 @@ agent.run(
improved_prompt could be "A bright blue space suit wearing rabbit, on the surface of the moon, under a bright orange sunset, with the Earth visible in the background"
Now that I have improved the prompt, I can use the image generator tool to generate an image based on this prompt.
=== Agent is executing the code below:
>>> Agent is executing the code below:
image = image_generator(prompt="A bright blue space suit wearing rabbit, on the surface of the moon, under a bright orange sunset, with the Earth visible in the background")
final_answer(image)
```

View File

@ -0,0 +1,387 @@
<!--Copyright 2020 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# Benchmarks
<Tip warning={true}>
Hugging Face's Benchmarking tools are deprecated and it is advised to use external Benchmarking libraries to measure the speed
and memory complexity of Transformer models.
</Tip>
[[open-in-colab]]
Let's take a look at how 🤗 Transformers models can be benchmarked, best practices, and already available benchmarks.
A notebook explaining in more detail how to benchmark 🤗 Transformers models can be found [here](https://github.com/huggingface/notebooks/tree/main/examples/benchmark.ipynb).
## How to benchmark 🤗 Transformers models
The classes [`PyTorchBenchmark`] and [`TensorFlowBenchmark`] allow to flexibly benchmark 🤗 Transformers models. The benchmark classes allow us to measure the _peak memory usage_ and _required time_ for both _inference_ and _training_.
<Tip>
Here, _inference_ is defined by a single forward pass, and _training_ is defined by a single forward pass and
backward pass.
</Tip>
The benchmark classes [`PyTorchBenchmark`] and [`TensorFlowBenchmark`] expect an object of type [`PyTorchBenchmarkArguments`] and
[`TensorFlowBenchmarkArguments`], respectively, for instantiation. [`PyTorchBenchmarkArguments`] and [`TensorFlowBenchmarkArguments`] are data classes and contain all relevant configurations for their corresponding benchmark class. In the following example, it is shown how a BERT model of type _bert-base-cased_ can be benchmarked.
<frameworkcontent>
<pt>
```py
>>> from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments
>>> args = PyTorchBenchmarkArguments(models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512])
>>> benchmark = PyTorchBenchmark(args)
```
</pt>
<tf>
```py
>>> from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments
>>> args = TensorFlowBenchmarkArguments(
... models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
... )
>>> benchmark = TensorFlowBenchmark(args)
```
</tf>
</frameworkcontent>
Here, three arguments are given to the benchmark argument data classes, namely `models`, `batch_sizes`, and
`sequence_lengths`. The argument `models` is required and expects a `list` of model identifiers from the
[model hub](https://huggingface.co/models) The `list` arguments `batch_sizes` and `sequence_lengths` define
the size of the `input_ids` on which the model is benchmarked. There are many more parameters that can be configured
via the benchmark argument data classes. For more detail on these one can either directly consult the files
`src/transformers/benchmark/benchmark_args_utils.py`, `src/transformers/benchmark/benchmark_args.py` (for PyTorch)
and `src/transformers/benchmark/benchmark_args_tf.py` (for Tensorflow). Alternatively, running the following shell
commands from root will print out a descriptive list of all configurable parameters for PyTorch and Tensorflow
respectively.
<frameworkcontent>
<pt>
```bash
python examples/pytorch/benchmarking/run_benchmark.py --help
```
An instantiated benchmark object can then simply be run by calling `benchmark.run()`.
```py
>>> results = benchmark.run()
>>> print(results)
==================== INFERENCE - SPEED - RESULT ====================
--------------------------------------------------------------------------------
Model Name Batch Size Seq Length Time in s
--------------------------------------------------------------------------------
google-bert/bert-base-uncased 8 8 0.006
google-bert/bert-base-uncased 8 32 0.006
google-bert/bert-base-uncased 8 128 0.018
google-bert/bert-base-uncased 8 512 0.088
--------------------------------------------------------------------------------
==================== INFERENCE - MEMORY - RESULT ====================
--------------------------------------------------------------------------------
Model Name Batch Size Seq Length Memory in MB
--------------------------------------------------------------------------------
google-bert/bert-base-uncased 8 8 1227
google-bert/bert-base-uncased 8 32 1281
google-bert/bert-base-uncased 8 128 1307
google-bert/bert-base-uncased 8 512 1539
--------------------------------------------------------------------------------
==================== ENVIRONMENT INFORMATION ====================
- transformers_version: 2.11.0
- framework: PyTorch
- use_torchscript: False
- framework_version: 1.4.0
- python_version: 3.6.10
- system: Linux
- cpu: x86_64
- architecture: 64bit
- date: 2020-06-29
- time: 08:58:43.371351
- fp16: False
- use_multiprocessing: True
- only_pretrain_model: False
- cpu_ram_mb: 32088
- use_gpu: True
- num_gpus: 1
- gpu: TITAN RTX
- gpu_ram_mb: 24217
- gpu_power_watts: 280.0
- gpu_performance_state: 2
- use_tpu: False
```
</pt>
<tf>
```bash
python examples/tensorflow/benchmarking/run_benchmark_tf.py --help
```
An instantiated benchmark object can then simply be run by calling `benchmark.run()`.
```py
>>> results = benchmark.run()
>>> print(results)
>>> results = benchmark.run()
>>> print(results)
==================== INFERENCE - SPEED - RESULT ====================
--------------------------------------------------------------------------------
Model Name Batch Size Seq Length Time in s
--------------------------------------------------------------------------------
google-bert/bert-base-uncased 8 8 0.005
google-bert/bert-base-uncased 8 32 0.008
google-bert/bert-base-uncased 8 128 0.022
google-bert/bert-base-uncased 8 512 0.105
--------------------------------------------------------------------------------
==================== INFERENCE - MEMORY - RESULT ====================
--------------------------------------------------------------------------------
Model Name Batch Size Seq Length Memory in MB
--------------------------------------------------------------------------------
google-bert/bert-base-uncased 8 8 1330
google-bert/bert-base-uncased 8 32 1330
google-bert/bert-base-uncased 8 128 1330
google-bert/bert-base-uncased 8 512 1770
--------------------------------------------------------------------------------
==================== ENVIRONMENT INFORMATION ====================
- transformers_version: 2.11.0
- framework: Tensorflow
- use_xla: False
- framework_version: 2.2.0
- python_version: 3.6.10
- system: Linux
- cpu: x86_64
- architecture: 64bit
- date: 2020-06-29
- time: 09:26:35.617317
- fp16: False
- use_multiprocessing: True
- only_pretrain_model: False
- cpu_ram_mb: 32088
- use_gpu: True
- num_gpus: 1
- gpu: TITAN RTX
- gpu_ram_mb: 24217
- gpu_power_watts: 280.0
- gpu_performance_state: 2
- use_tpu: False
```
</tf>
</frameworkcontent>
By default, the _time_ and the _required memory_ for _inference_ are benchmarked. In the example output above the first
two sections show the result corresponding to _inference time_ and _inference memory_. In addition, all relevant
information about the computing environment, _e.g._ the GPU type, the system, the library versions, etc... are printed
out in the third section under _ENVIRONMENT INFORMATION_. This information can optionally be saved in a _.csv_ file
when adding the argument `save_to_csv=True` to [`PyTorchBenchmarkArguments`] and
[`TensorFlowBenchmarkArguments`] respectively. In this case, every section is saved in a separate
_.csv_ file. The path to each _.csv_ file can optionally be defined via the argument data classes.
Instead of benchmarking pre-trained models via their model identifier, _e.g._ `google-bert/bert-base-uncased`, the user can
alternatively benchmark an arbitrary configuration of any available model class. In this case, a `list` of
configurations must be inserted with the benchmark args as follows.
<frameworkcontent>
<pt>
```py
>>> from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments, BertConfig
>>> args = PyTorchBenchmarkArguments(
... models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
... )
>>> config_base = BertConfig()
>>> config_384_hid = BertConfig(hidden_size=384)
>>> config_6_lay = BertConfig(num_hidden_layers=6)
>>> benchmark = PyTorchBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
>>> benchmark.run()
==================== INFERENCE - SPEED - RESULT ====================
--------------------------------------------------------------------------------
Model Name Batch Size Seq Length Time in s
--------------------------------------------------------------------------------
bert-base 8 128 0.006
bert-base 8 512 0.006
bert-base 8 128 0.018
bert-base 8 512 0.088
bert-384-hid 8 8 0.006
bert-384-hid 8 32 0.006
bert-384-hid 8 128 0.011
bert-384-hid 8 512 0.054
bert-6-lay 8 8 0.003
bert-6-lay 8 32 0.004
bert-6-lay 8 128 0.009
bert-6-lay 8 512 0.044
--------------------------------------------------------------------------------
==================== INFERENCE - MEMORY - RESULT ====================
--------------------------------------------------------------------------------
Model Name Batch Size Seq Length Memory in MB
--------------------------------------------------------------------------------
bert-base 8 8 1277
bert-base 8 32 1281
bert-base 8 128 1307
bert-base 8 512 1539
bert-384-hid 8 8 1005
bert-384-hid 8 32 1027
bert-384-hid 8 128 1035
bert-384-hid 8 512 1255
bert-6-lay 8 8 1097
bert-6-lay 8 32 1101
bert-6-lay 8 128 1127
bert-6-lay 8 512 1359
--------------------------------------------------------------------------------
==================== ENVIRONMENT INFORMATION ====================
- transformers_version: 2.11.0
- framework: PyTorch
- use_torchscript: False
- framework_version: 1.4.0
- python_version: 3.6.10
- system: Linux
- cpu: x86_64
- architecture: 64bit
- date: 2020-06-29
- time: 09:35:25.143267
- fp16: False
- use_multiprocessing: True
- only_pretrain_model: False
- cpu_ram_mb: 32088
- use_gpu: True
- num_gpus: 1
- gpu: TITAN RTX
- gpu_ram_mb: 24217
- gpu_power_watts: 280.0
- gpu_performance_state: 2
- use_tpu: False
```
</pt>
<tf>
```py
>>> from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments, BertConfig
>>> args = TensorFlowBenchmarkArguments(
... models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
... )
>>> config_base = BertConfig()
>>> config_384_hid = BertConfig(hidden_size=384)
>>> config_6_lay = BertConfig(num_hidden_layers=6)
>>> benchmark = TensorFlowBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
>>> benchmark.run()
==================== INFERENCE - SPEED - RESULT ====================
--------------------------------------------------------------------------------
Model Name Batch Size Seq Length Time in s
--------------------------------------------------------------------------------
bert-base 8 8 0.005
bert-base 8 32 0.008
bert-base 8 128 0.022
bert-base 8 512 0.106
bert-384-hid 8 8 0.005
bert-384-hid 8 32 0.007
bert-384-hid 8 128 0.018
bert-384-hid 8 512 0.064
bert-6-lay 8 8 0.002
bert-6-lay 8 32 0.003
bert-6-lay 8 128 0.0011
bert-6-lay 8 512 0.074
--------------------------------------------------------------------------------
==================== INFERENCE - MEMORY - RESULT ====================
--------------------------------------------------------------------------------
Model Name Batch Size Seq Length Memory in MB
--------------------------------------------------------------------------------
bert-base 8 8 1330
bert-base 8 32 1330
bert-base 8 128 1330
bert-base 8 512 1770
bert-384-hid 8 8 1330
bert-384-hid 8 32 1330
bert-384-hid 8 128 1330
bert-384-hid 8 512 1540
bert-6-lay 8 8 1330
bert-6-lay 8 32 1330
bert-6-lay 8 128 1330
bert-6-lay 8 512 1540
--------------------------------------------------------------------------------
==================== ENVIRONMENT INFORMATION ====================
- transformers_version: 2.11.0
- framework: Tensorflow
- use_xla: False
- framework_version: 2.2.0
- python_version: 3.6.10
- system: Linux
- cpu: x86_64
- architecture: 64bit
- date: 2020-06-29
- time: 09:38:15.487125
- fp16: False
- use_multiprocessing: True
- only_pretrain_model: False
- cpu_ram_mb: 32088
- use_gpu: True
- num_gpus: 1
- gpu: TITAN RTX
- gpu_ram_mb: 24217
- gpu_power_watts: 280.0
- gpu_performance_state: 2
- use_tpu: False
```
</tf>
</frameworkcontent>
Again, _inference time_ and _required memory_ for _inference_ are measured, but this time for customized configurations
of the `BertModel` class. This feature can especially be helpful when deciding for which configuration the model
should be trained.
## Benchmark best practices
This section lists a couple of best practices one should be aware of when benchmarking a model.
- Currently, only single device benchmarking is supported. When benchmarking on GPU, it is recommended that the user
specifies on which device the code should be run by setting the `CUDA_VISIBLE_DEVICES` environment variable in the
shell, _e.g._ `export CUDA_VISIBLE_DEVICES=0` before running the code.
- The option `no_multi_processing` should only be set to `True` for testing and debugging. To ensure accurate
memory measurement it is recommended to run each memory benchmark in a separate process by making sure
`no_multi_processing` is set to `True`.
- One should always state the environment information when sharing the results of a model benchmark. Results can vary
heavily between different GPU devices, library versions, etc., as a consequence, benchmark results on their own are not very
useful for the community.
## Sharing your benchmark
Previously all available core models (10 at the time) have been benchmarked for _inference time_, across many different
settings: using PyTorch, with and without TorchScript, using TensorFlow, with and without XLA. All of those tests were
done across CPUs (except for TensorFlow XLA) and GPUs.
The approach is detailed in the [following blogpost](https://medium.com/huggingface/benchmarking-transformers-pytorch-and-tensorflow-e2917fb891c2) and the results are
available [here](https://docs.google.com/spreadsheets/d/1sryqufw2D0XlUH4sq3e9Wnxu5EAQkaohzrJbd5HdQ_w/edit?usp=sharing).
With the new _benchmark_ tools, it is easier than ever to share your benchmark results with the community
- [PyTorch Benchmarking Results](https://github.com/huggingface/transformers/tree/main/examples/pytorch/benchmarking/README.md).
- [TensorFlow Benchmarking Results](https://github.com/huggingface/transformers/tree/main/examples/tensorflow/benchmarking/README.md).

View File

@ -1,463 +0,0 @@
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# Advanced Usage and Customizing Your Chat Templates
In this page, well explore more advanced techniques for working with chat templates in Transformers. Whether youre looking to write your own templates, create custom components, or optimize your templates for efficiency, well cover everything you need to take your templates to the next level. Lets dive into the tools and strategies that will help you get the most out of your chat models.
## How do chat templates work?
The chat template for a model is stored on the `tokenizer.chat_template` attribute. Let's take a look at a `Zephyr` chat template, though note this
one is a little simplified from the actual one!
```
{%- for message in messages %}
{{- '<|' + message['role'] + '|>\n' }}
{{- message['content'] + eos_token }}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|assistant|>\n' }}
{%- endif %}
```
If you've never seen one of these before, this is a [Jinja template](https://jinja.palletsprojects.com/en/3.1.x/templates/).
Jinja is a templating language that allows you to write simple code that generates text. In many ways, the code and
syntax resembles Python. In pure Python, this template would look something like this:
```python
for message in messages:
print(f'<|{message["role"]}|>')
print(message['content'] + eos_token)
if add_generation_prompt:
print('<|assistant|>')
```
Effectively, the template does three things:
1. For each message, print the role enclosed in `<|` and `|>`, like `<|user|>` or `<|assistant|>`.
2. Next, print the content of the message, followed by the end-of-sequence token.
3. Finally, if `add_generation_prompt` is set, print the assistant token, so that the model knows to start generating
an assistant response.
This is a pretty simple template but Jinja gives you a lot of flexibility to do more complex things! Let's see a Jinja
template that can format inputs similarly to the way LLaMA formats them (note that the real LLaMA template includes
handling for default system messages and slightly different system message handling in general - don't use this one
in your actual code!)
```
{%- for message in messages %}
{%- if message['role'] == 'user' %}
{{- bos_token + '[INST] ' + message['content'] + ' [/INST]' }}
{%- elif message['role'] == 'system' %}
{{- '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}
{%- elif message['role'] == 'assistant' %}
{{- ' ' + message['content'] + ' ' + eos_token }}
{%- endif %}
{%- endfor %}
```
Hopefully if you stare at this for a little bit you can see what this template is doing - it adds specific tokens like
`[INST]` and `[/INST]` based on the role of each message. User, assistant and system messages are clearly
distinguishable to the model because of the tokens they're wrapped in.
## How do I create a chat template?
Simple, just write a jinja template and set `tokenizer.chat_template`. You may find it easier to start with an
existing template from another model and simply edit it for your needs! For example, we could take the LLaMA template
above and add "[ASST]" and "[/ASST]" to assistant messages:
```
{%- for message in messages %}
{%- if message['role'] == 'user' %}
{{- bos_token + '[INST] ' + message['content'].strip() + ' [/INST]' }}
{%- elif message['role'] == 'system' %}
{{- '<<SYS>>\\n' + message['content'].strip() + '\\n<</SYS>>\\n\\n' }}
{%- elif message['role'] == 'assistant' %}
{{- '[ASST] ' + message['content'] + ' [/ASST]' + eos_token }}
{%- endif %}
{%- endfor %}
```
Now, simply set the `tokenizer.chat_template` attribute. Next time you use [`~PreTrainedTokenizer.apply_chat_template`], it will
use your new template! This attribute will be saved in the `tokenizer_config.json` file, so you can use
[`~utils.PushToHubMixin.push_to_hub`] to upload your new template to the Hub and make sure everyone's using the right
template for your model!
```python
template = tokenizer.chat_template
template = template.replace("SYS", "SYSTEM") # Change the system token
tokenizer.chat_template = template # Set the new template
tokenizer.push_to_hub("model_name") # Upload your new template to the Hub!
```
The method [`~PreTrainedTokenizer.apply_chat_template`] which uses your chat template is called by the [`TextGenerationPipeline`] class, so
once you set the correct chat template, your model will automatically become compatible with [`TextGenerationPipeline`].
<Tip>
If you're fine-tuning a model for chat, in addition to setting a chat template, you should probably add any new chat
control tokens as special tokens in the tokenizer. Special tokens are never split,
ensuring that your control tokens are always handled as single tokens rather than being tokenized in pieces. You
should also set the tokenizer's `eos_token` attribute to the token that marks the end of assistant generations in your
template. This will ensure that text generation tools can correctly figure out when to stop generating text.
</Tip>
## Why do some models have multiple templates?
Some models use different templates for different use cases. For example, they might use one template for normal chat
and another for tool-use, or retrieval-augmented generation. In these cases, `tokenizer.chat_template` is a dictionary.
This can cause some confusion, and where possible, we recommend using a single template for all use-cases. You can use
Jinja statements like `if tools is defined` and `{% macro %}` definitions to easily wrap multiple code paths in a
single template.
When a tokenizer has multiple templates, `tokenizer.chat_template` will be a `dict`, where each key is the name
of a template. The `apply_chat_template` method has special handling for certain template names: Specifically, it will
look for a template named `default` in most cases, and will raise an error if it can't find one. However, if a template
named `tool_use` exists when the user has passed a `tools` argument, it will use that instead. To access templates
with other names, pass the name of the template you want to the `chat_template` argument of
`apply_chat_template()`.
We find that this can be a bit confusing for users, though - so if you're writing a template yourself, we recommend
trying to put it all in a single template where possible!
## What template should I use?
When setting the template for a model that's already been trained for chat, you should ensure that the template
exactly matches the message formatting that the model saw during training, or else you will probably experience
performance degradation. This is true even if you're training the model further - you will probably get the best
performance if you keep the chat tokens constant. This is very analogous to tokenization - you generally get the
best performance for inference or fine-tuning when you precisely match the tokenization used during training.
If you're training a model from scratch, or fine-tuning a base language model for chat, on the other hand,
you have a lot of freedom to choose an appropriate template! LLMs are smart enough to learn to handle lots of different
input formats. One popular choice is the `ChatML` format, and this is a good, flexible choice for many use-cases.
It looks like this:
```
{%- for message in messages %}
{{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}
{%- endfor %}
```
If you like this one, here it is in one-liner form, ready to copy into your code. The one-liner also includes
handy support for [generation prompts](#what-are-generation-prompts), but note that it doesn't add BOS or EOS tokens!
If your model expects those, they won't be added automatically by `apply_chat_template` - in other words, the
text will be tokenized with `add_special_tokens=False`. This is to avoid potential conflicts between the template and
the `add_special_tokens` logic. If your model expects special tokens, make sure to add them to the template!
```python
tokenizer.chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
```
This template wraps each message in `<|im_start|>` and `<|im_end|>` tokens, and simply writes the role as a string, which
allows for flexibility in the roles you train with. The output looks like this:
```text
<|im_start|>system
You are a helpful chatbot that will do its best not to say anything so stupid that people tweet about it.<|im_end|>
<|im_start|>user
How are you?<|im_end|>
<|im_start|>assistant
I'm doing great!<|im_end|>
```
The "user", "system" and "assistant" roles are the standard for chat, and we recommend using them when it makes sense,
particularly if you want your model to operate well with [`TextGenerationPipeline`]. However, you are not limited
to these roles - templating is extremely flexible, and any string can be a role.
## I want to add some chat templates! How should I get started?
If you have any chat models, you should set their `tokenizer.chat_template` attribute and test it using
[`~PreTrainedTokenizer.apply_chat_template`], then push the updated tokenizer to the Hub. This applies even if you're
not the model owner - if you're using a model with an empty chat template, or one that's still using the default class
template, please open a [pull request](https://huggingface.co/docs/hub/repositories-pull-requests-discussions) to the model repository so that this attribute can be set properly!
Once the attribute is set, that's it, you're done! `tokenizer.apply_chat_template` will now work correctly for that
model, which means it is also automatically supported in places like `TextGenerationPipeline`!
By ensuring that models have this attribute, we can make sure that the whole community gets to use the full power of
open-source models. Formatting mismatches have been haunting the field and silently harming performance for too long -
it's time to put an end to them!
<Tip>
The easiest way to get started with writing Jinja templates is to take a look at some existing ones. You can use
`print(tokenizer.chat_template)` for any chat model to see what template it's using. In general, models that support tool use have
much more complex templates than other models - so when you're just getting started, they're probably a bad example
to learn from! You can also take a look at the
[Jinja documentation](https://jinja.palletsprojects.com/en/3.1.x/templates/#synopsis) for details
of general Jinja formatting and syntax.
</Tip>
Jinja templates in `transformers` are identical to Jinja templates elsewhere. The main thing to know is that
the conversation history will be accessible inside your template as a variable called `messages`.
You will be able to access `messages` in your template just like you can in Python, which means you can loop over
it with `{% for message in messages %}` or access individual messages with `{{ messages[0] }}`, for example.
You can also use the following tips to write clean, efficient Jinja templates:
### Trimming whitespace
By default, Jinja will print any whitespace that comes before or after a block. This can be a problem for chat
templates, which generally want to be very precise with whitespace! To avoid this, we strongly recommend writing
your templates like this:
```
{%- for message in messages %}
{{- message['role'] + message['content'] }}
{%- endfor %}
```
rather than like this:
```
{% for message in messages %}
{{ message['role'] + message['content'] }}
{% endfor %}
```
Adding `-` will strip any whitespace that comes before the block. The second example looks innocent, but the newline
and indentation may end up being included in the output, which is probably not what you want!
### Special variables
Inside your template, you will have access several special variables. The most important of these is `messages`,
which contains the chat history as a list of message dicts. However, there are several others. Not every
variable will be used in every template. The most common other variables are:
- `tools` contains a list of tools in JSON schema format. Will be `None` or undefined if no tools are passed.
- `documents` contains a list of documents in the format `{"title": "Title", "contents": "Contents"}`, used for retrieval-augmented generation. Will be `None` or undefined if no documents are passed.
- `add_generation_prompt` is a bool that is `True` if the user has requested a generation prompt, and `False` otherwise. If this is set, your template should add the header for an assistant message to the end of the conversation. If your model doesn't have a specific header for assistant messages, you can ignore this flag.
- **Special tokens** like `bos_token` and `eos_token`. These are extracted from `tokenizer.special_tokens_map`. The exact tokens available inside each template will differ depending on the parent tokenizer.
<Tip>
You can actually pass any `kwarg` to `apply_chat_template`, and it will be accessible inside the template as a variable. In general,
we recommend trying to stick to the core variables above, as it will make your model harder to use if users have
to write custom code to pass model-specific `kwargs`. However, we're aware that this field moves quickly, so if you
have a new use-case that doesn't fit in the core API, feel free to use a new `kwarg` for it! If a new `kwarg`
becomes common we may promote it into the core API and create a standard, documented format for it.
</Tip>
### Callable functions
There is also a short list of callable functions available to you inside your templates. These are:
- `raise_exception(msg)`: Raises a `TemplateException`. This is useful for debugging, and for telling users when they're
doing something that your template doesn't support.
- `strftime_now(format_str)`: Equivalent to `datetime.now().strftime(format_str)` in Python. This is used for getting
the current date/time in a specific format, which is sometimes included in system messages.
### Compatibility with non-Python Jinja
There are multiple implementations of Jinja in various languages. They generally have the same syntax,
but a key difference is that when you're writing a template in Python you can use Python methods, such as
`.lower()` on strings or `.items()` on dicts. This will break if someone tries to use your template on a non-Python
implementation of Jinja. Non-Python implementations are particularly common in deployment environments, where JS
and Rust are very popular.
Don't panic, though! There are a few easy changes you can make to your templates to ensure they're compatible across
all implementations of Jinja:
- Replace Python methods with Jinja filters. These usually have the same name, for example `string.lower()` becomes
`string|lower`, and `dict.items()` becomes `dict|items`. One notable change is that `string.strip()` becomes `string|trim`.
See the [list of built-in filters](https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters)
in the Jinja documentation for more.
- Replace `True`, `False` and `None`, which are Python-specific, with `true`, `false` and `none`.
- Directly rendering a dict or list may give different results in other implementations (for example, string entries
might change from single-quoted to double-quoted). Adding the `tojson` filter can help to ensure consistency here.
### Writing generation prompts
We mentioned above that `add_generation_prompt` is a special variable that will be accessible inside your template,
and is controlled by the user setting the `add_generation_prompt` flag. If your model expects a header for
assistant messages, then your template must support adding the header when `add_generation_prompt` is set.
Here is an example of a template that formats messages ChatML-style, with generation prompt support:
```text
{{- bos_token }}
{%- for message in messages %}
{{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- endif %}
```
The exact content of the assistant header will depend on your specific model, but it should always be **the string
that represents the start of an assistant message**, so that if the user applies your template with
`add_generation_prompt=True` and then generates text, the model will write an assistant response. Also note that some
models do not need a generation prompt, because assistant messages always begin immediately after user messages.
This is particularly common for LLaMA and Mistral models, where assistant messages begin immediately after the `[/INST]`
token that ends user messages. In these cases, the template can ignore the `add_generation_prompt` flag.
Generation prompts are important! If your model requires a generation prompt but it is not set in the template, then
model generations will likely be severely degraded, or the model may display unusual behaviour like continuing
the final user message!
### Writing and debugging larger templates
When this feature was introduced, most templates were quite small, the Jinja equivalent of a "one-liner" script.
However, with new models and features like tool-use and RAG, some templates can be 100 lines long or more. When
writing templates like these, it's a good idea to write them in a separate file, using a text editor. You can easily
extract a chat template to a file:
```python
open("template.jinja", "w").write(tokenizer.chat_template)
```
Or load the edited template back into the tokenizer:
```python
tokenizer.chat_template = open("template.jinja").read()
```
As an added bonus, when you write a long, multi-line template in a separate file, line numbers in that file will
exactly correspond to line numbers in template parsing or execution errors. This will make it much easier to
identify the source of issues.
## Writing templates for tools
Although chat templates do not enforce a specific API for tools (or for anything, really), we recommend
template authors try to stick to a standard API where possible. The whole point of chat templates is to allow code
to be transferable across models, so deviating from the standard tools API means users will have to write
custom code to use tools with your model. Sometimes it's unavoidable, but often with clever templating you can
make the standard API work!
Below, we'll list the elements of the standard API, and give tips on writing templates that will work well with it.
### Tool definitions
Your template should expect that the variable `tools` will either be null (if no tools are passed), or is a list
of JSON schema dicts. Our chat template methods allow users to pass tools as either JSON schema or Python functions, but when
functions are passed, we automatically generate JSON schema and pass that to your template. As a result, the
`tools` variable that your template receives will always be a list of JSON schema. Here is
a sample tool JSON schema:
```json
{
"type": "function",
"function": {
"name": "multiply",
"description": "A function that multiplies two numbers",
"parameters": {
"type": "object",
"properties": {
"a": {
"type": "number",
"description": "The first number to multiply"
},
"b": {
"type": "number",
"description": "The second number to multiply"
}
},
"required": ["a", "b"]
}
}
}
```
And here is some example code for handling tools in your chat template. Remember, this is just an example for a
specific format - your model will probably need different formatting!
```text
{%- if tools %}
{%- for tool in tools %}
{{- '<tool>' + tool['function']['name'] + '\n' }}
{%- for argument in tool['function']['parameters']['properties'] %}
{{- argument + ': ' + tool['function']['parameters']['properties'][argument]['description'] + '\n' }}
{%- endfor %}
{{- '\n</tool>' }}
{%- endif %}
{%- endif %}
```
The specific tokens and tool descriptions your template renders should of course be chosen to match the ones your model
was trained with. There is no requirement that your **model** understands JSON schema input, only that your template can translate
JSON schema into your model's format. For example, [Command-R](https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024)
was trained with tools defined using Python function headers, but the Command-R tool template accepts JSON schema,
converts types internally and renders the input tools as Python headers. You can do a lot with templates!
### Tool calls
Tool calls, if present, will be a list attached to a message with the "assistant" role. Note that `tool_calls` is
always a list, even though most tool-calling models only support single tool calls at a time, which means
the list will usually only have a single element. Here is a sample message dict containing a tool call:
```json
{
"role": "assistant",
"tool_calls": [
{
"type": "function",
"function": {
"name": "multiply",
"arguments": {
"a": 5,
"b": 6
}
}
}
]
}
```
And a common pattern for handling them would be something like this:
```text
{%- if message['role'] == 'assistant' and 'tool_calls' in message %}
{%- for tool_call in message['tool_calls'] %}
{{- '<tool_call>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments']|tojson + '\n</tool_call>' }}
{%- endif %}
{%- endfor %}
{%- endif %}
```
Again, you should render the tool call with the formatting and special tokens that your model expects.
### Tool responses
Tool responses have a simple format: They are a message dict with the "tool" role, a "name" key giving the name
of the called function, and a "content" key containing the result of the tool call. Here is a sample tool response:
```json
{
"role": "tool",
"name": "multiply",
"content": "30"
}
```
You don't need to use all of the keys in the tool response. For example, if your model doesn't expect the function
name to be included in the tool response, then rendering it can be as simple as:
```text
{%- if message['role'] == 'tool' %}
{{- "<tool_result>" + message['content'] + "</tool_result>" }}
{%- endif %}
```
Again, remember that the actual formatting and special tokens are model-specific - you should take a lot of care
to ensure that tokens, whitespace and everything else exactly match the format your model was trained with!

View File

@ -1,287 +0,0 @@
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# Getting Started with Chat Templates for Text LLMs
An increasingly common use case for LLMs is **chat**. In a chat context, rather than continuing a single string
of text (as is the case with a standard language model), the model instead continues a conversation that consists
of one or more **messages**, each of which includes a **role**, like "user" or "assistant", as well as message text.
Much like tokenization, different models expect very different input formats for chat. This is the reason we added
**chat templates** as a feature. Chat templates are part of the tokenizer for text-only LLMs or processor for multimodal LLMs. They specify how to convert conversations, represented as lists of messages, into a single tokenizable string in the format that the model expects.
We'll explore the basic usage of chat templates with text-only LLMs in this page. For detailed guidance on multimodal models, we have a dedicated [documentation oage for multimodal models](./chat_template_multimodal), which covers how to work with image, video and audio inputs in your templates.
Let's make this concrete with a quick example using the `mistralai/Mistral-7B-Instruct-v0.1` model:
```python
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
>>> chat = [
... {"role": "user", "content": "Hello, how are you?"},
... {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
... {"role": "user", "content": "I'd like to show off how chat templating works!"},
... ]
>>> tokenizer.apply_chat_template(chat, tokenize=False)
"<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]"
```
Notice how the tokenizer has added the control tokens [INST] and [/INST] to indicate the start and end of
user messages (but not assistant messages!), and the entire chat is condensed into a single string.
If we use `tokenize=True`, which is the default setting, that string will also be tokenized for us.
Now, try the same code, but swap in the `HuggingFaceH4/zephyr-7b-beta` model instead, and you should get:
```text
<|user|>
Hello, how are you?</s>
<|assistant|>
I'm doing great. How can I help you today?</s>
<|user|>
I'd like to show off how chat templating works!</s>
```
Both Zephyr and Mistral-Instruct were fine-tuned from the same base model, `Mistral-7B-v0.1`. However, they were trained
with totally different chat formats. Without chat templates, you would have to write manual formatting code for each
model, and it's very easy to make minor errors that hurt performance! Chat templates handle the details of formatting
for you, allowing you to write universal code that works for any model.
## How do I use chat templates?
As you can see in the example above, chat templates are easy to use. Simply build a list of messages, with `role`
and `content` keys, and then pass it to the [`~PreTrainedTokenizer.apply_chat_template`] or [`~ProcessorMixin.apply_chat_template`] method
depending on what type of model you are using. Once you do that,
you'll get output that's ready to go! When using chat templates as input for model generation, it's also a good idea
to use `add_generation_prompt=True` to add a [generation prompt](#what-are-generation-prompts).
Here's an example of preparing input for `model.generate()`, using `Zephyr` again:
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
checkpoint = "HuggingFaceH4/zephyr-7b-beta"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint) # You may want to use bfloat16 and/or move to GPU here
messages = [
{
"role": "system",
"content": "You are a friendly chatbot who always responds in the style of a pirate",
},
{"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
]
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
print(tokenizer.decode(tokenized_chat[0]))
```
This will yield a string in the input format that Zephyr expects.
```text
<|system|>
You are a friendly chatbot who always responds in the style of a pirate</s>
<|user|>
How many helicopters can a human eat in one sitting?</s>
<|assistant|>
```
Now that our input is formatted correctly for Zephyr, we can use the model to generate a response to the user's question:
```python
outputs = model.generate(tokenized_chat, max_new_tokens=128)
print(tokenizer.decode(outputs[0]))
```
This will yield:
```text
<|system|>
You are a friendly chatbot who always responds in the style of a pirate</s>
<|user|>
How many helicopters can a human eat in one sitting?</s>
<|assistant|>
Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all.
```
Arr, 'twas easy after all!
## Is there an automated pipeline for chat?
Yes, there is! Our text generation pipelines support chat inputs, which makes it easy to use chat models. In the past,
we used to use a dedicated "ConversationalPipeline" class, but this has now been deprecated and its functionality
has been merged into the [`TextGenerationPipeline`]. Let's try the `Zephyr` example again, but this time using
a pipeline:
```python
from transformers import pipeline
pipe = pipeline("text-generation", "HuggingFaceH4/zephyr-7b-beta")
messages = [
{
"role": "system",
"content": "You are a friendly chatbot who always responds in the style of a pirate",
},
{"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
]
print(pipe(messages, max_new_tokens=128)[0]['generated_text'][-1]) # Print the assistant's response
```
```text
{'role': 'assistant', 'content': "Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all."}
```
The pipeline will take care of all the details of tokenization and calling `apply_chat_template` for you -
once the model has a chat template, all you need to do is initialize the pipeline and pass it the list of messages!
## What are "generation prompts"?
You may have noticed that the `apply_chat_template` method has an `add_generation_prompt` argument. This argument tells
the template to add tokens that indicate the start of a bot response. For example, consider the following chat:
```python
messages = [
{"role": "user", "content": "Hi there!"},
{"role": "assistant", "content": "Nice to meet you!"},
{"role": "user", "content": "Can I ask a question?"}
]
```
Here's what this will look like without a generation prompt, for a model that uses standard "ChatML" formatting:
```python
tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
"""<|im_start|>user
Hi there!<|im_end|>
<|im_start|>assistant
Nice to meet you!<|im_end|>
<|im_start|>user
Can I ask a question?<|im_end|>
"""
```
And here's what it looks like **with** a generation prompt:
```python
tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
"""<|im_start|>user
Hi there!<|im_end|>
<|im_start|>assistant
Nice to meet you!<|im_end|>
<|im_start|>user
Can I ask a question?<|im_end|>
<|im_start|>assistant
"""
```
Note that this time, we've added the tokens that indicate the start of a bot response. This ensures that when the model
generates text it will write a bot response instead of doing something unexpected, like continuing the user's
message. Remember, chat models are still just language models - they're trained to continue text, and chat is just a
special kind of text to them! You need to guide them with appropriate control tokens, so they know what they're
supposed to be doing.
Not all models require generation prompts. Some models, like LLaMA, don't have any
special tokens before bot responses. In these cases, the `add_generation_prompt` argument will have no effect. The exact
effect that `add_generation_prompt` has will depend on the template being used.
## What does "continue_final_message" do?
When passing a list of messages to `apply_chat_template` or `TextGenerationPipeline`, you can choose
to format the chat so the model will continue the final message in the chat instead of starting a new one. This is done
by removing any end-of-sequence tokens that indicate the end of the final message, so that the model will simply
extend the final message when it begins to generate text. This is useful for "prefilling" the model's response.
Here's an example:
```python
chat = [
{"role": "user", "content": "Can you format the answer in JSON?"},
{"role": "assistant", "content": '{"name": "'},
]
formatted_chat = tokenizer.apply_chat_template(chat, tokenize=True, return_dict=True, continue_final_message=True)
model.generate(**formatted_chat)
```
The model will generate text that continues the JSON string, rather than starting a new message. This approach
can be very useful for improving the accuracy of the model's instruction-following when you know how you want
it to start its replies.
Because `add_generation_prompt` adds the tokens that start a new message, and `continue_final_message` removes any
end-of-message tokens from the final message, it does not make sense to use them together. As a result, you'll
get an error if you try!
<Tip>
The default behaviour of `TextGenerationPipeline` is to set `add_generation_prompt=True` so that it starts a new
message. However, if the final message in the input chat has the "assistant" role, it will assume that this message is
a prefill and switch to `continue_final_message=True` instead, because most models do not support multiple
consecutive assistant messages. You can override this behaviour by explicitly passing the `continue_final_message`
argument when calling the pipeline.
</Tip>
## Can I use chat templates in training?
Yes! This is a good way to ensure that the chat template matches the tokens the model sees during training.
We recommend that you apply the chat template as a preprocessing step for your dataset. After this, you
can simply continue like any other language model training task. When training, you should usually set
`add_generation_prompt=False`, because the added tokens to prompt an assistant response will not be helpful during
training. Let's see an example:
```python
from transformers import AutoTokenizer
from datasets import Dataset
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
chat1 = [
{"role": "user", "content": "Which is bigger, the moon or the sun?"},
{"role": "assistant", "content": "The sun."}
]
chat2 = [
{"role": "user", "content": "Which is bigger, a virus or a bacterium?"},
{"role": "assistant", "content": "A bacterium."}
]
dataset = Dataset.from_dict({"chat": [chat1, chat2]})
dataset = dataset.map(lambda x: {"formatted_chat": tokenizer.apply_chat_template(x["chat"], tokenize=False, add_generation_prompt=False)})
print(dataset['formatted_chat'][0])
```
And we get:
```text
<|user|>
Which is bigger, the moon or the sun?</s>
<|assistant|>
The sun.</s>
```
From here, just continue training like you would with a standard language modelling task, using the `formatted_chat` column.
<Tip>
By default, some tokenizers add special tokens like `<bos>` and `<eos>` to text they tokenize. Chat templates should
already include all the special tokens they need, and so additional special tokens will often be incorrect or
duplicated, which will hurt model performance.
Therefore, if you format text with `apply_chat_template(tokenize=False)`, you should set the argument
`add_special_tokens=False` when you tokenize that text later. If you use `apply_chat_template(tokenize=True)`, you don't need to worry about this!
</Tip>

View File

@ -1,289 +0,0 @@
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# Multimodal Chat Templates for Vision and Audio LLMs
In this section, we'll explore how to use chat templates with multimodal models, enabling your templates to handle a variety of inputs such as text, images, and audio. Multimodal models provide richer, more interactive experiences, and understanding how to effectively combine these inputs within your templates is key. Well walk through how to work with different modalities, configure your templates for optimal performance, and tackle common challenges along the way.
Just like with text-only LLMs, multimodal models expect a chat with **messages**, each of which includes a **role** and **content**. However, for multimodal models, chat templates are a part of the [Processor](./main_cllasses/processors) class. Let's see how we can format our prompts when there are images or videos in the input along with text.
## Image inputs
For models such as [LLaVA](https://huggingface.co/llava-hf) the prompts can be formatted as below. Notice that the only difference from text-only models is that we need to also pass a placeholder for input images. To accommodate for extra modalities, each **content** is a list containing either a text or an image **type**.
Let's make this concrete with a quick example using the `llava-hf/llava-onevision-qwen2-0.5b-ov-hf` model:
```python
from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration
model_id = "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
processor = AutoProcessor.from_pretrained(model_id)
messages = [
{
"role": "system",
"content": [{"type": "text", "text": "You are a friendly chatbot who always responds in the style of a pirate"}],
},
{
"role": "user",
"content": [
{"type": "image"},
{"type": "text", "text": "What are these?"},
],
},
]
formatted_prompt = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
print(formatted_prompt)
```
This yields a string in LLaVA's expected input format with many `<image>` tokens prepended before the text.
```text
'<|im_start|>system
<|im_start|>system
You are a friendly chatbot who always responds in the style of a pirate<|im_end|><|im_start|>user <image>
What are these?<|im_end|>
```
### Image paths or URLs
To incorporate images into your chat templates, you can pass them as file paths or URLs. This method automatically loads the image, processes it, and prepares the necessary pixel values to create ready-to-use inputs for the model. This approach simplifies the integration of images, enabling seamless multimodal functionality.
Let's see how it works with an example using the same model as above. This time we'll indicate an image URL with `"url"` key in the message's **content** and ask the chat template to `tokenize` and `return_dict`. Currently, "base64", "url", and "path" are supported image sources.
```python
from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration
model_id = "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
model = LlavaOnevisionForConditionalGeneration.from_pretrained(model_id)
processor = AutoProcessor.from_pretrained(model_id)
messages = [
{
"role": "system",
"content": [{"type": "text", "text": "You are a friendly chatbot who always responds in the style of a pirate"}],
},
{
"role": "user",
"content": [
{"type": "image", "url": "http://images.cocodataset.org/val2017/000000039769.jpg"},
{"type": "text", "text": "What are these?"},
],
},
]
processed_chat = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt")
print(processed_chat.keys())
```
This yields a dictionary with inputs processed and ready to be further passed into [`~GenerationMixin.generate`] to generate text.
```text
dict_keys(["input_ids", "attention_mask", "pixel_values", "image_sizes"])
```
## Video inputs
Some vision models support videos as inputs as well as images. The message format is very similar to the image-only models with tiny differences to handle loading videos from a URL. We can continue using the same model as before since it supports videos.
### Sampling with fixed number of frames
Here's an example of how to set up a conversation with video inputs. Notice the extra `kwargs` passed to `processor.apply_chat_template()`. The key parameter here is `num_frames`, which controls how many frames to sample uniformly from the video. Each model checkpoint has a maximum frame count it was trained with, and exceeding this limit can significantly impact generation quality. So, its important to choose a frame count that fits both the model's capacity and your computational resources. If you don't specify `num_frames`, the entire video will be loaded without any frame sampling.
You also have the option to choose a specific framework to load the video, depending on your preferences or needs. Currently, we support `decord`, `pyav` (the default), `opencv`, and `torchvision`. For this example, well use `decord`, as it's a bit faster than `pyav`.
<Tip>
Note that if you are trying to load a video from URL, you can decode the video only with `pyav` or `decord` as backend.
</Tip>
```python
from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration
model_id = "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
model = LlavaOnevisionForConditionalGeneration.from_pretrained(model_id)
processor = AutoProcessor.from_pretrained(model_id)
messages = [
{
"role": "system",
"content": [{"type": "text", "text": "You are a friendly chatbot who always responds in the style of a pirate"}],
},
{
"role": "user",
"content": [
{"type": "video", "url": "https://test-videos.co.uk/vids/bigbuckbunny/mp4/h264/720/Big_Buck_Bunny_720_10s_10MB.mp4"},
{"type": "text", "text": "What do you see in this video?"},
],
},
]
processed_chat = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
num_frames=32,
video_load_backend="decord",
)
print(processed_chat.keys())
```
### Sampling with FPS
When working with long videos, you might want to sample more frames for better representation. Instead of a fixed number of frames, you can specify `video_fps`, which determines how many frames per second to extract. For example, if a video is **10 seconds long** and you set `video_fps=2`, the model will sample **20 frames** (2 per second, uniformly spaced).
Using the above model, we need to apply chat template as follows to sample 2 frames per second.
```python
processed_chat = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
video_fps=32,
video_load_backend="decord",
)
print(processed_chat.keys())
```
### Custom Frame Sampling with a Function
Not all models sample frames **uniformly** — some require more complex logic to determine which frames to use. If your model follows a different sampling strategy, you can **customize** frame selection by providing a function:
🔹 Use the `sample_indices_fn` argument to pass a **callable function** for sampling.
🔹 If provided, this function **overrides** standard `num_frames` and `fps` methods.
🔹 It receives all the arguments passed to `load_video` and must return **valid frame indices** to sample.
You should use `sample_indices_fn` when:
- If you need a custom sampling strategy (e.g., **adaptive frame selection** instead of uniform sampling).
- If your model prioritizes **key moments** in a video rather than evenly spaced frames.
Heres an example of how to implement it:
```python
def sample_indices_fn(metadata, **kwargs):
# samples only the first and the second frame
return [0, 1]
processed_chat = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
sample_indices_fn=sample_indices_fn,
video_load_backend="decord",
)
print(processed_chat.keys())
```
By using `sample_indices_fn`, you gain **full control** over frame selection, making your model **more adaptable** to different video scenarios. 🚀
### List of image frames as video
Sometimes, instead of having a full video file, you might only have a set of sampled frames stored as images.
You can pass a list of image file paths, and the processor will automatically concatenate them into a video. Just make sure that all images have the same size, as they are assumed to be from the same video.
```python
frames_paths = ["/path/to/frame0.png", "/path/to/frame5.png", "/path/to/frame10.png"]
messages = [
{
"role": "system",
"content": [{"type": "text", "text": "You are a friendly chatbot who always responds in the style of a pirate"}],
},
{
"role": "user",
"content": [
{"type": "video", "path": frames_paths},
{"type": "text", "text": "What do you see in this video?"},
],
},
]
processed_chat = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
)
print(processed_chat.keys())
```
## Multimodal conversational pipeline
[`ImageTextToTextPipeline`] currently accepts images as inputs but we are planning to add support for video inputs in the future. The pipeline supports chat inputs in the same format as we have seen above. Apart from that, the pipeline will accept chats in OpenAI format. This format is supported exclusively within the pipeline to make inference easier and more accessible.
Here is how the OpenAI conversation format looks:
```python
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What is in this image?",
},
{
"type": "image_url",
"image_url": {"url": f"http://images.cocodataset.org/val2017/000000039769.jpg"},
},
],
}
]
```
## Best Practices for Multimodal Template Configuration
To add a custom chat template for your multimodal LLM, simply create your template using [Jinja](https://jinja.palletsprojects.com/en/3.1.x/templates/) and set it with `processor.chat_template`. If you're new to writing chat templates or need some tips, check out our [tutorial here](./chat_template_advanced) for helpful guidance.
In some cases, you may want your template to handle a **list of content** from multiple modalities, while still supporting a plain string for text-only inference. Here's an example of how you can achieve that, using the [Llama-Vision](https://huggingface.co/collections/meta-llama/metas-llama-32-multimodal-models-675bfd70e574a62dd0e4059b) chat template.
```
{% for message in messages %}
{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}
{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}
{% if message['content'] is string %}
{{ message['content'] }}
{% else %}
{% for content in message['content'] %}
{% if content['type'] == 'image' %}
{{ '<|image|>' }}
{% elif content['type'] == 'text' %}
{{ content['text'] }}
{% endif %}
{% endfor %}
{% endif %}
{{ '<|eot_id|>' }}
{% endfor %}
{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}
```

View File

@ -1,410 +0,0 @@
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# Expanding Chat Templates with Tools and Documents
The only argument that `apply_chat_template` requires is `messages`. However, you can pass any keyword
argument to `apply_chat_template` and it will be accessible inside the template. This gives you a lot of freedom to use
chat templates for many things. There are no restrictions on the names or the format of these arguments - you can pass
strings, lists, dicts or whatever else you want.
That said, there are some common use-cases for these extra arguments,
such as passing tools for function calling, or documents for retrieval-augmented generation. In these common cases,
we have some opinionated recommendations about what the names and formats of these arguments should be, which are
described in the sections below. We encourage model authors to make their chat templates compatible with this format,
to make it easy to transfer tool-calling code between models.
## Tool use / function calling
"Tool use" LLMs can choose to call functions as external tools before generating an answer. When passing tools
to a tool-use model, you can simply pass a list of functions to the `tools` argument:
```python
import datetime
def current_time():
"""Get the current local time as a string."""
return str(datetime.now())
def multiply(a: float, b: float):
"""
A function that multiplies two numbers
Args:
a: The first number to multiply
b: The second number to multiply
"""
return a * b
tools = [current_time, multiply]
model_input = tokenizer.apply_chat_template(
messages,
tools=tools
)
```
In order for this to work correctly, you should write your functions in the format above, so that they can be parsed
correctly as tools. Specifically, you should follow these rules:
- The function should have a descriptive name
- Every argument must have a type hint
- The function must have a docstring in the standard Google style (in other words, an initial function description
followed by an `Args:` block that describes the arguments, unless the function does not have any arguments.)
- Do not include types in the `Args:` block. In other words, write `a: The first number to multiply`, not
`a (int): The first number to multiply`. Type hints should go in the function header instead.
- The function can have a return type and a `Returns:` block in the docstring. However, these are optional
because most tool-use models ignore them.
### Passing tool results to the model
The sample code above is enough to list the available tools for your model, but what happens if it wants to actually use
one? If that happens, you should:
1. Parse the model's output to get the tool name(s) and arguments.
2. Add the model's tool call(s) to the conversation.
3. Call the corresponding function(s) with those arguments.
4. Add the result(s) to the conversation
### A complete tool use example
Let's walk through a tool use example, step by step. For this example, we will use an 8B `Hermes-2-Pro` model,
as it is one of the highest-performing tool-use models in its size category at the time of writing. If you have the
memory, you can consider using a larger model instead like [Command-R](https://huggingface.co/CohereForAI/c4ai-command-r-v01)
or [Mixtral-8x22B](https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1), both of which also support tool use
and offer even stronger performance.
First, let's load our model and tokenizer:
```python
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
checkpoint = "NousResearch/Hermes-2-Pro-Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
```
Next, let's define a list of tools:
```python
def get_current_temperature(location: str, unit: str) -> float:
"""
Get the current temperature at a location.
Args:
location: The location to get the temperature for, in the format "City, Country"
unit: The unit to return the temperature in. (choices: ["celsius", "fahrenheit"])
Returns:
The current temperature at the specified location in the specified units, as a float.
"""
return 22. # A real function should probably actually get the temperature!
def get_current_wind_speed(location: str) -> float:
"""
Get the current wind speed in km/h at a given location.
Args:
location: The location to get the temperature for, in the format "City, Country"
Returns:
The current wind speed at the given location in km/h, as a float.
"""
return 6. # A real function should probably actually get the wind speed!
tools = [get_current_temperature, get_current_wind_speed]
```
Now, let's set up a conversation for our bot:
```python
messages = [
{"role": "system", "content": "You are a bot that responds to weather queries. You should reply with the unit used in the queried location."},
{"role": "user", "content": "Hey, what's the temperature in Paris right now?"}
]
```
Now, let's apply the chat template and generate a response:
```python
inputs = tokenizer.apply_chat_template(messages, tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
inputs = {k: v.to(model.device) for k, v in inputs.items()}
out = model.generate(**inputs, max_new_tokens=128)
print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
```
And we get:
```text
<tool_call>
{"arguments": {"location": "Paris, France", "unit": "celsius"}, "name": "get_current_temperature"}
</tool_call><|im_end|>
```
The model has called the function with valid arguments, in the format requested by the function docstring. It has
inferred that we're most likely referring to the Paris in France, and it remembered that, as the home of SI units,
the temperature in France should certainly be displayed in Celsius.
<Tip>
The output format above is specific to the `Hermes-2-Pro` model we're using in this example. Other models may emit different
tool call formats, and you may need to do some manual parsing at this step. For example, `Llama-3.1` models will emit
slightly different JSON, with `parameters` instead of `arguments`. Regardless of the format the model outputs, you
should add the tool call to the conversation in the format below, with `tool_calls`, `function` and `arguments` keys.
</Tip>
Next, let's append the model's tool call to the conversation.
```python
tool_call = {"name": "get_current_temperature", "arguments": {"location": "Paris, France", "unit": "celsius"}}
messages.append({"role": "assistant", "tool_calls": [{"type": "function", "function": tool_call}]})
```
<Tip warning={true}>
If you're familiar with the OpenAI API, you should pay attention to an important difference here - the `tool_call` is
a dict, but in the OpenAI API it's a JSON string. Passing a string may cause errors or strange model behaviour!
</Tip>
Now that we've added the tool call to the conversation, we can call the function and append the result to the
conversation. Since we're just using a dummy function for this example that always returns 22.0, we can just append
that result directly.
```python
messages.append({"role": "tool", "name": "get_current_temperature", "content": "22.0"})
```
<Tip>
Some model architectures, notably Mistral/Mixtral, also require a `tool_call_id` here, which should be
9 randomly-generated alphanumeric characters, and assigned to the `id` key of the tool call
dictionary. The same key should also be assigned to the `tool_call_id` key of the tool response dictionary below, so
that tool calls can be matched to tool responses. So, for Mistral/Mixtral models, the code above would be:
```python
tool_call_id = "9Ae3bDc2F" # Random ID, 9 alphanumeric characters
tool_call = {"name": "get_current_temperature", "arguments": {"location": "Paris, France", "unit": "celsius"}}
messages.append({"role": "assistant", "tool_calls": [{"type": "function", "id": tool_call_id, "function": tool_call}]})
```
and
```python
messages.append({"role": "tool", "tool_call_id": tool_call_id, "name": "get_current_temperature", "content": "22.0"})
```
</Tip>
Finally, let's let the assistant read the function outputs and continue chatting with the user:
```python
inputs = tokenizer.apply_chat_template(messages, tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
inputs = {k: v.to(model.device) for k, v in inputs.items()}
out = model.generate(**inputs, max_new_tokens=128)
print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
```
And we get:
```text
The current temperature in Paris, France is 22.0 ° Celsius.<|im_end|>
```
Although this was a simple demo with dummy tools and a single call, the same technique works with
multiple real tools and longer conversations. This can be a powerful way to extend the capabilities of conversational
agents with real-time information, computational tools like calculators, or access to large databases.
### Understanding tool schemas
Each function you pass to the `tools` argument of `apply_chat_template` is converted into a
[JSON schema](https://json-schema.org/learn/getting-started-step-by-step). These schemas
are then passed to the model chat template. In other words, tool-use models do not see your functions directly, and they
never see the actual code inside them. What they care about is the function **definitions** and the **arguments** they
need to pass to them - they care about what the tools do and how to use them, not how they work! It is up to you
to read their outputs, detect if they have requested to use a tool, pass their arguments to the tool function, and
return the response in the chat.
Generating JSON schemas to pass to the template should be automatic and invisible as long as your functions
follow the specification above, but if you encounter problems, or you simply want more control over the conversion,
you can handle the conversion manually. Here is an example of a manual schema conversion.
```python
from transformers.utils import get_json_schema
def multiply(a: float, b: float):
"""
A function that multiplies two numbers
Args:
a: The first number to multiply
b: The second number to multiply
"""
return a * b
schema = get_json_schema(multiply)
print(schema)
```
This will yield:
```json
{
"type": "function",
"function": {
"name": "multiply",
"description": "A function that multiplies two numbers",
"parameters": {
"type": "object",
"properties": {
"a": {
"type": "number",
"description": "The first number to multiply"
},
"b": {
"type": "number",
"description": "The second number to multiply"
}
},
"required": ["a", "b"]
}
}
}
```
If you wish, you can edit these schemas, or even write them from scratch yourself without using `get_json_schema` at
all. JSON schemas can be passed directly to the `tools` argument of
`apply_chat_template` - this gives you a lot of power to define precise schemas for more complex functions. Be careful,
though - the more complex your schemas, the more likely the model is to get confused when dealing with them! We
recommend simple function signatures where possible, keeping arguments (and especially complex, nested arguments)
to a minimum.
Here is an example of defining schemas by hand, and passing them directly to `apply_chat_template`:
```python
# A simple function that takes no arguments
current_time = {
"type": "function",
"function": {
"name": "current_time",
"description": "Get the current local time as a string.",
"parameters": {
'type': 'object',
'properties': {}
}
}
}
# A more complete function that takes two numerical arguments
multiply = {
'type': 'function',
'function': {
'name': 'multiply',
'description': 'A function that multiplies two numbers',
'parameters': {
'type': 'object',
'properties': {
'a': {
'type': 'number',
'description': 'The first number to multiply'
},
'b': {
'type': 'number', 'description': 'The second number to multiply'
}
},
'required': ['a', 'b']
}
}
}
model_input = tokenizer.apply_chat_template(
messages,
tools = [current_time, multiply]
)
```
## Retrieval-augmented generation
"Retrieval-augmented generation" or "RAG" LLMs can search a corpus of documents for information before responding
to a query. This allows models to vastly expand their knowledge base beyond their limited context size. Our
recommendation for RAG models is that their template
should accept a `documents` argument. This should be a list of documents, where each "document"
is a single dict with `title` and `contents` keys, both of which are strings. Because this format is much simpler
than the JSON schemas used for tools, no helper functions are necessary.
Here's an example of a RAG template in action:
```python
from transformers import AutoTokenizer, AutoModelForCausalLM
# Load the model and tokenizer
model_id = "CohereForAI/c4ai-command-r-v01-4bit"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
device = model.device # Get the device the model is loaded on
# Define conversation input
conversation = [
{"role": "user", "content": "What has Man always dreamed of?"}
]
# Define documents for retrieval-based generation
documents = [
{
"title": "The Moon: Our Age-Old Foe",
"text": "Man has always dreamed of destroying the moon. In this essay, I shall..."
},
{
"title": "The Sun: Our Age-Old Friend",
"text": "Although often underappreciated, the sun provides several notable benefits..."
}
]
# Tokenize conversation and documents using a RAG template, returning PyTorch tensors.
input_ids = tokenizer.apply_chat_template(
conversation=conversation,
documents=documents,
chat_template="rag",
tokenize=True,
add_generation_prompt=True,
return_tensors="pt").to(device)
# Generate a response
gen_tokens = model.generate(
input_ids,
max_new_tokens=100,
do_sample=True,
temperature=0.3,
)
# Decode and print the generated text along with generation prompt
gen_text = tokenizer.decode(gen_tokens[0])
print(gen_text)
```
<Tip>
The `documents` input for retrieval-augmented generation is not widely supported, and many models have chat templates which simply ignore this input.
To verify if a model supports the `documents` input, you can read its model card, or `print(tokenizer.chat_template)` to see if the `documents` key is used anywhere.
One model class that does support it, though, is Cohere's [Command-R](https://huggingface.co/CohereForAI/c4ai-command-r-08-2024) and [Command-R+](https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024), through their `rag` chat template. You can see additional examples of grounded generation using this feature in their model cards.
</Tip>

File diff suppressed because it is too large Load Diff

View File

@ -30,7 +30,7 @@ DeepSpeed compiles CUDA C++ code and it can be a potential source of errors when
<Tip>
For any other installation issues, please [open an issue](https://github.com/deepspeedai/DeepSpeed/issues) with the DeepSpeed team.
For any other installation issues, please [open an issue](https://github.com/microsoft/DeepSpeed/issues) with the DeepSpeed team.
</Tip>
@ -89,7 +89,7 @@ sudo ln -s /usr/bin/g++-7 /usr/local/cuda-10.2/bin/g++
If you're still having issues with installing DeepSpeed or if you're building DeepSpeed at run time, you can try to prebuild the DeepSpeed modules before installing them. To make a local build for DeepSpeed:
```bash
git clone https://github.com/deepspeedai/DeepSpeed/
git clone https://github.com/microsoft/DeepSpeed/
cd DeepSpeed
rm -rf build
TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 pip install . \
@ -141,7 +141,7 @@ It is also possible to not specify `TORCH_CUDA_ARCH_LIST` and the build program
For training on multiple machines with the same setup, you'll need to make a binary wheel:
```bash
git clone https://github.com/deepspeedai/DeepSpeed/
git clone https://github.com/microsoft/DeepSpeed/
cd DeepSpeed
rm -rf build
TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 \

View File

@ -28,7 +28,7 @@ This guide will walk you through how to deploy DeepSpeed training, the features
## Installation
DeepSpeed is available to install from PyPI or Transformers (for more detailed installation options, take a look at the DeepSpeed [installation details](https://www.deepspeed.ai/tutorials/advanced-install/) or the GitHub [README](https://github.com/deepspeedai/DeepSpeed#installation)).
DeepSpeed is available to install from PyPI or Transformers (for more detailed installation options, take a look at the DeepSpeed [installation details](https://www.deepspeed.ai/tutorials/advanced-install/) or the GitHub [README](https://github.com/microsoft/deepspeed#installation)).
<Tip>
@ -114,10 +114,10 @@ DeepSpeed works with the [`Trainer`] class by way of a config file containing al
<Tip>
Find a complete list of DeepSpeed configuration options on the [DeepSpeed Configuration JSON](https://www.deepspeed.ai/docs/config-json/) reference. You can also find more practical examples of various DeepSpeed configuration examples on the [DeepSpeedExamples](https://github.com/deepspeedai/DeepSpeedExamples) repository or the main [DeepSpeed](https://github.com/deepspeedai/DeepSpeed) repository. To quickly find specific examples, you can:
Find a complete list of DeepSpeed configuration options on the [DeepSpeed Configuration JSON](https://www.deepspeed.ai/docs/config-json/) reference. You can also find more practical examples of various DeepSpeed configuration examples on the [DeepSpeedExamples](https://github.com/microsoft/DeepSpeedExamples) repository or the main [DeepSpeed](https://github.com/microsoft/DeepSpeed) repository. To quickly find specific examples, you can:
```bash
git clone https://github.com/deepspeedai/DeepSpeedExamples
git clone https://github.com/microsoft/DeepSpeedExamples
cd DeepSpeedExamples
find . -name '*json'
# find examples with the Lamb optimizer
@ -303,7 +303,7 @@ For more information about initializing large models with ZeRO-3 and accessing t
[ZeRO-Infinity](https://hf.co/papers/2104.07857) allows offloading model states to the CPU and/or NVMe to save even more memory. Smart partitioning and tiling algorithms allow each GPU to send and receive very small amounts of data during offloading such that a modern NVMe can fit an even larger total memory pool than is available to your training process. ZeRO-Infinity requires ZeRO-3.
Depending on the CPU and/or NVMe memory available, you can offload both the [optimizer states](https://www.deepspeed.ai/docs/config-json/#optimizer-offloading) and [parameters](https://www.deepspeed.ai/docs/config-json/#parameter-offloading), just one of them, or none. You should also make sure the `nvme_path` is pointing to an NVMe device, because while it still works with a normal hard drive or solid state drive, it'll be significantly slower. With a modern NVMe, you can expect peak transfer speeds of ~3.5GB/s for read and ~3GB/s for write operations. Lastly, [run a benchmark](https://github.com/deepspeedai/DeepSpeed/issues/998) on your training setup to determine the optimal `aio` configuration.
Depending on the CPU and/or NVMe memory available, you can offload both the [optimizer states](https://www.deepspeed.ai/docs/config-json/#optimizer-offloading) and [parameters](https://www.deepspeed.ai/docs/config-json/#parameter-offloading), just one of them, or none. You should also make sure the `nvme_path` is pointing to an NVMe device, because while it still works with a normal hard drive or solid state drive, it'll be significantly slower. With a modern NVMe, you can expect peak transfer speeds of ~3.5GB/s for read and ~3GB/s for write operations. Lastly, [run a benchmark](https://github.com/microsoft/DeepSpeed/issues/998) on your training setup to determine the optimal `aio` configuration.
The example ZeRO-3/Infinity configuration file below sets most of the parameter values to `auto`, but you could also manually add these values.
@ -586,20 +586,6 @@ You can choose the communication data type by setting the `communication_data_ty
}
```
### Universal Checkpointing
[Universal Checkpointing](https://www.deepspeed.ai/tutorials/universal-checkpointing) is an efficient and flexible feature for saving and loading model checkpoints. It enables seamless model training continuation and fine-tuning across different model architectures, parallelism techniques, and training configurations.
Resume training with a universal checkpoint by setting [load_universal](https://www.deepspeed.ai/docs/config-json/#checkpoint-options) to `true` in the config file.
```yaml
{
"checkpoint": {
"load_universal": true
}
}
```
## Deployment
DeepSpeed can be deployed by different launchers such as [torchrun](https://pytorch.org/docs/stable/elastic/run.html), the `deepspeed` launcher, or [Accelerate](https://huggingface.co/docs/accelerate/basic_tutorials/launch#using-accelerate-launch). To deploy, add `--deepspeed ds_config.json` to the [`Trainer`] command line. Its recommended to use DeepSpeeds [`add_config_arguments`](https://deepspeed.readthedocs.io/en/latest/initialize.html#argument-parsing) utility to add any necessary command line arguments to your code.
@ -1157,7 +1143,7 @@ For Transformers>=4.28, if `synced_gpus` is automatically set to `True` if multi
## Troubleshoot
When you encounter an issue, you should consider whether DeepSpeed is the cause of the problem because often it isn't (unless it's super obviously and you can see DeepSpeed modules in the exception)! The first step should be to retry your setup without DeepSpeed, and if the problem persists, then you can report the issue. If the issue is a core DeepSpeed problem and unrelated to the Transformers integration, open an Issue on the [DeepSpeed repository](https://github.com/deepspeedai/DeepSpeed).
When you encounter an issue, you should consider whether DeepSpeed is the cause of the problem because often it isn't (unless it's super obviously and you can see DeepSpeed modules in the exception)! The first step should be to retry your setup without DeepSpeed, and if the problem persists, then you can report the issue. If the issue is a core DeepSpeed problem and unrelated to the Transformers integration, open an Issue on the [DeepSpeed repository](https://github.com/microsoft/DeepSpeed).
For issues related to the Transformers integration, please provide the following information:
@ -1227,7 +1213,7 @@ This means the DeepSpeed loss scaler is unable to find a scaling coefficient to
## Resources
DeepSpeed ZeRO is a powerful technology for training and loading very large models for inference with limited GPU resources, making it more accessible to everyone. To learn more about DeepSpeed, feel free to read the [blog posts](https://www.microsoft.com/en-us/research/search/?q=deepspeed), [documentation](https://www.deepspeed.ai/getting-started/), and [GitHub repository](https://github.com/deepspeedai/DeepSpeed).
DeepSpeed ZeRO is a powerful technology for training and loading very large models for inference with limited GPU resources, making it more accessible to everyone. To learn more about DeepSpeed, feel free to read the [blog posts](https://www.microsoft.com/en-us/research/search/?q=deepspeed), [documentation](https://www.deepspeed.ai/getting-started/), and [GitHub repository](https://github.com/microsoft/deepspeed).
The following papers are also a great resource for learning more about ZeRO:

View File

@ -58,7 +58,7 @@ Otherwise, you can choose a size-based wrapping policy where FSDP is applied to
### Checkpointing
Intermediate checkpoints should be saved with `fsdp_state_dict_type: SHARDED_STATE_DICT` because saving the full state dict with CPU offloading on rank 0 takes a lot of time and often results in `NCCL Timeout` errors due to indefinite hanging during broadcasting. You can resume training with the sharded state dicts with the [`~accelerate.Accelerator.load_state`] method.
Intermediate checkpoints should be saved with `fsdp_state_dict_type: SHARDED_STATE_DICT` because saving the full state dict with CPU offloading on rank 0 takes a lot of time and often results in `NCCL Timeout` errors due to indefinite hanging during broadcasting. You can resume training with the sharded state dicts with the [`~accelerate.Accelerator.load_state`]` method.
```py
# directory containing checkpoints

View File

@ -41,13 +41,6 @@ This guide describes:
* common decoding strategies and their main parameters
* saving and sharing custom generation configurations with your fine-tuned model on 🤗 Hub
<Tip>
`generate()` is a critical component of our [chat CLI](quicktour#chat-with-text-generation-models).
You can apply the learnings of this guide there as well.
</Tip>
## Default text generation configuration
A decoding strategy for a model is defined in its generation configuration. When using pre-trained models for inference
@ -103,12 +96,6 @@ distribution over the entire vocabulary with various strategy-specific adjustmen
the decoding strategies that support multiple sequence candidates, e.g. variations of beam search and sampling. Decoding
strategies like greedy search and contrastive search return a single output sequence.
It is also possible to extend `generate()` with external libraries or handcrafted code. The `logits_processor` argument
allows you to pass custom [`LogitsProcessor`] instances, allowing you to manipulate the next token probability
distributions. Likewise, the `stopping_criteria` argument lets you set custom [`StoppingCriteria`] to stop text generation.
The [`logits-processor-zoo`](https://github.com/NVIDIA/logits-processor-zoo) library contains examples of external
`generate()`-compatible extensions.
## Save a custom decoding strategy with your model
If you would like to share your fine-tuned model with a specific generation configuration, you can:
@ -231,7 +218,7 @@ to check if the text is machine-generated (outputs `True` for machine-generated
>>> detector = WatermarkDetector(model_config=model.config, device="cpu", watermarking_config=watermarking_config)
>>> detection_out = detector(out, return_dict=True)
>>> detection_out.prediction
array([ True, True])
array([True, True])
```
@ -269,7 +256,7 @@ dimension you can act upon, in addition to selecting a decoding strategy. Popula
>>> model = AutoModelForCausalLM.from_pretrained(checkpoint)
>>> outputs = model.generate(**inputs)
>>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
['I look forward to seeing you all again!\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n']
['I look forward to seeing you all again!\n\n\n\n\n\n\n\n\n\n\n']
```
### Contrastive search
@ -445,31 +432,9 @@ To enable assisted decoding, set the `assistant_model` argument with a model.
>>> assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
>>> outputs = model.generate(**inputs, assistant_model=assistant_model)
>>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a glass of wine.']
['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a']
```
<Tip>
If you're using a `pipeline` object, all you need to do is to pass the assistant checkpoint under `assistant_model`
```python
>>> from transformers import pipeline
>>> import torch
>>> pipe = pipeline(
... "text-generation",
... model="meta-llama/Llama-3.1-8B",
... assistant_model="meta-llama/Llama-3.2-1B", # This extra line is all that's needed, also works with UAD
... torch_dtype=torch.bfloat16
... )
>>> pipe_output = pipe("Once upon a time, ", max_new_tokens=50, do_sample=False)
>>> pipe_output[0]["generated_text"]
'Once upon a time, 3D printing was a niche technology that was only'
```
</Tip>
When using assisted decoding with sampling methods, you can use the `temperature` argument to control the randomness,
just like in multinomial sampling. However, in assisted decoding, reducing the temperature may help improve the latency.
@ -488,7 +453,7 @@ just like in multinomial sampling. However, in assisted decoding, reducing the t
>>> assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
>>> outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.5)
>>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
['Alice and Bob are two people who are very different, but they are both very good at what they do. Alice']
['Alice and Bob, a couple of friends of mine, who are both in the same office as']
```
We recommend to install `scikit-learn` library to enhance the candidate generation strategy and achieve additional speedup.
@ -518,7 +483,7 @@ to ensure the new tokens include the correct prompt suffix.
>>> assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
>>> outputs = model.generate(**inputs, assistant_model=assistant_model, tokenizer=tokenizer, assistant_tokenizer=assistant_tokenizer)
>>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
['Alice and Bob are playing a game. Alice has a set of $n$ integers $a_1, a']
['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a']
```
#### Prompt Lookup
@ -547,7 +512,7 @@ If the model you're using was trained to do early exit, you can pass
>>> model = AutoModelForCausalLM.from_pretrained(checkpoint)
>>> outputs = model.generate(**inputs, assistant_early_exit=4, do_sample=False, max_new_tokens=20)
>>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
['Alice and Bob are playing a game. Alice has a set of $n$ integers $a_1, a']
['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a']
```
### DoLa Decoding
@ -571,9 +536,10 @@ See the following examples for DoLa decoding with the 32-layer LLaMA-7B model.
>>> import torch
>>> from accelerate.test_utils.testing import get_backend
>>> device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
>>> tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b")
>>> model = AutoModelForCausalLM.from_pretrained("huggyllama/llama-7b", torch_dtype=torch.float16).to(device)
>>> model = AutoModelForCausalLM.from_pretrained("huggyllama/llama-7b", torch_dtype=torch.float16)
>>> device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
>>> model.to(device)
>>> set_seed(42)
>>> text = "On what date was the Declaration of Independence officially signed?"
@ -592,7 +558,7 @@ See the following examples for DoLa decoding with the 32-layer LLaMA-7B model.
# DoLa decoding with contrasting specific layers (layers 28 and 30)
>>> dola_custom_output = model.generate(**inputs, do_sample=False, max_new_tokens=50, dola_layers=[28,30], repetition_penalty=1.2)
>>> tokenizer.batch_decode(dola_custom_output[:, inputs.input_ids.shape[-1]:], skip_special_tokens=True)
['\nIn 1891, when he was 54 years old, John Jacob Astor founded his empire. He opened a one-man business and spent the next 27 years working 10-hour days. When']
['\nIt was officially signed on 2 August 1776, when 56 members of the Second Continental Congress, representing the original 13 American colonies, voted unanimously for the resolution for independence. The 2']
```
#### Understanding the `dola_layers` argument

View File

@ -88,7 +88,6 @@ For now the supported model architectures are the architectures that have been v
- T5
- Mamba
- Nemotron
- Gemma2
## Example usage

View File

@ -24,37 +24,7 @@ You'll learn how to:
- Modify a model's architecture by changing its attention mechanism.
- Apply techniques like Low-Rank Adaptation (LoRA) to specific model components.
We encourage you to contribute your own hacks and share them here with the community!
## Efficient Development Workflow
When modifying model code, you'll often need to test your changes without restarting your Python session. The `clear_import_cache()` utility helps with this workflow, especially during model development and contribution when you need to frequently test and compare model outputs:
```python
from transformers import AutoModel
model = AutoModel.from_pretrained("bert-base-uncased")
# Make modifications to the transformers code...
# Clear the cache to reload the modified code
from transformers.utils.import_utils import clear_import_cache
clear_import_cache()
# Reimport to get the changes
from transformers import AutoModel
model = AutoModel.from_pretrained("bert-base-uncased") # Will use updated code
```
This is particularly useful when:
- Iteratively modifying model architectures
- Debugging model implementations
- Testing changes during model development
- Comparing outputs between original and modified versions
- Working on model contributions
The `clear_import_cache()` function removes all cached Transformers modules and allows Python to reload the modified code. This enables rapid development cycles without constantly restarting your environment.
This workflow is especially valuable when implementing new models, where you need to frequently compare outputs between the original implementation and your Transformers version (as described in the [Add New Model](https://huggingface.co/docs/transformers/add_new_model) guide).
We encourage you to contribute your own hacks and share them here with the community1
## Example: Modifying the Attention Mechanism in the Segment Anything Model (SAM)

View File

@ -62,11 +62,8 @@ Flax), PyTorch, and/or TensorFlow.
| [ALBERT](model_doc/albert) | ✅ | ✅ | ✅ |
| [ALIGN](model_doc/align) | ✅ | ❌ | ❌ |
| [AltCLIP](model_doc/altclip) | ✅ | ❌ | ❌ |
| [Aria](model_doc/aria) | ✅ | ❌ | ❌ |
| [AriaText](model_doc/aria_text) | ✅ | ❌ | ❌ |
| [Audio Spectrogram Transformer](model_doc/audio-spectrogram-transformer) | ✅ | ❌ | ❌ |
| [Autoformer](model_doc/autoformer) | ✅ | ❌ | ❌ |
| [Bamba](model_doc/bamba) | ✅ | ❌ | ❌ |
| [Bark](model_doc/bark) | ✅ | ❌ | ❌ |
| [BART](model_doc/bart) | ✅ | ✅ | ✅ |
| [BARThez](model_doc/barthez) | ✅ | ✅ | ✅ |
@ -100,8 +97,6 @@ Flax), PyTorch, and/or TensorFlow.
| [CodeGen](model_doc/codegen) | ✅ | ❌ | ❌ |
| [CodeLlama](model_doc/code_llama) | ✅ | ❌ | ✅ |
| [Cohere](model_doc/cohere) | ✅ | ❌ | ❌ |
| [Cohere2](model_doc/cohere2) | ✅ | ❌ | ❌ |
| [ColPali](model_doc/colpali) | ✅ | ❌ | ❌ |
| [Conditional DETR](model_doc/conditional_detr) | ✅ | ❌ | ❌ |
| [ConvBERT](model_doc/convbert) | ✅ | ✅ | ❌ |
| [ConvNeXT](model_doc/convnext) | ✅ | ✅ | ❌ |
@ -110,7 +105,6 @@ Flax), PyTorch, and/or TensorFlow.
| [CPM-Ant](model_doc/cpmant) | ✅ | ❌ | ❌ |
| [CTRL](model_doc/ctrl) | ✅ | ✅ | ❌ |
| [CvT](model_doc/cvt) | ✅ | ✅ | ❌ |
| [DAB-DETR](model_doc/dab-detr) | ✅ | ❌ | ❌ |
| [DAC](model_doc/dac) | ✅ | ❌ | ❌ |
| [Data2VecAudio](model_doc/data2vec) | ✅ | ❌ | ❌ |
| [Data2VecText](model_doc/data2vec) | ✅ | ❌ | ❌ |
@ -123,14 +117,11 @@ Flax), PyTorch, and/or TensorFlow.
| [DeiT](model_doc/deit) | ✅ | ✅ | ❌ |
| [DePlot](model_doc/deplot) | ✅ | ❌ | ❌ |
| [Depth Anything](model_doc/depth_anything) | ✅ | ❌ | ❌ |
| [DepthPro](model_doc/depth_pro) | ✅ | ❌ | ❌ |
| [DETA](model_doc/deta) | ✅ | ❌ | ❌ |
| [DETR](model_doc/detr) | ✅ | ❌ | ❌ |
| [DialoGPT](model_doc/dialogpt) | ✅ | ✅ | ✅ |
| [DiffLlama](model_doc/diffllama) | ✅ | ❌ | ❌ |
| [DiNAT](model_doc/dinat) | ✅ | ❌ | ❌ |
| [DINOv2](model_doc/dinov2) | ✅ | ❌ | ✅ |
| [DINOv2 with Registers](model_doc/dinov2_with_registers) | ✅ | ❌ | ❌ |
| [DistilBERT](model_doc/distilbert) | ✅ | ✅ | ✅ |
| [DiT](model_doc/dit) | ✅ | ❌ | ✅ |
| [DonutSwin](model_doc/donut) | ✅ | ❌ | ❌ |
@ -139,7 +130,6 @@ Flax), PyTorch, and/or TensorFlow.
| [EfficientFormer](model_doc/efficientformer) | ✅ | ✅ | ❌ |
| [EfficientNet](model_doc/efficientnet) | ✅ | ❌ | ❌ |
| [ELECTRA](model_doc/electra) | ✅ | ✅ | ✅ |
| [Emu3](model_doc/emu3) | ✅ | ❌ | ❌ |
| [EnCodec](model_doc/encodec) | ✅ | ❌ | ❌ |
| [Encoder decoder](model_doc/encoder-decoder) | ✅ | ✅ | ✅ |
| [ERNIE](model_doc/ernie) | ✅ | ❌ | ❌ |
@ -147,7 +137,6 @@ Flax), PyTorch, and/or TensorFlow.
| [ESM](model_doc/esm) | ✅ | ✅ | ❌ |
| [FairSeq Machine-Translation](model_doc/fsmt) | ✅ | ❌ | ❌ |
| [Falcon](model_doc/falcon) | ✅ | ❌ | ❌ |
| [Falcon3](model_doc/falcon3) | ✅ | ❌ | ✅ |
| [FalconMamba](model_doc/falcon_mamba) | ✅ | ❌ | ❌ |
| [FastSpeech2Conformer](model_doc/fastspeech2_conformer) | ✅ | ❌ | ❌ |
| [FLAN-T5](model_doc/flan-t5) | ✅ | ✅ | ✅ |
@ -163,7 +152,6 @@ Flax), PyTorch, and/or TensorFlow.
| [GIT](model_doc/git) | ✅ | ❌ | ❌ |
| [GLM](model_doc/glm) | ✅ | ❌ | ❌ |
| [GLPN](model_doc/glpn) | ✅ | ❌ | ❌ |
| [GOT-OCR2](model_doc/got_ocr2) | ✅ | ❌ | ❌ |
| [GPT Neo](model_doc/gpt_neo) | ✅ | ❌ | ✅ |
| [GPT NeoX](model_doc/gpt_neox) | ✅ | ❌ | ❌ |
| [GPT NeoX Japanese](model_doc/gpt_neox_japanese) | ✅ | ❌ | ❌ |
@ -173,11 +161,9 @@ Flax), PyTorch, and/or TensorFlow.
| [GPTSAN-japanese](model_doc/gptsan-japanese) | ✅ | ❌ | ❌ |
| [Granite](model_doc/granite) | ✅ | ❌ | ❌ |
| [GraniteMoeMoe](model_doc/granitemoe) | ✅ | ❌ | ❌ |
| [GraniteMoeSharedMoe](model_doc/granitemoeshared) | ✅ | ❌ | ❌ |
| [Graphormer](model_doc/graphormer) | ✅ | ❌ | ❌ |
| [Grounding DINO](model_doc/grounding-dino) | ✅ | ❌ | ❌ |
| [GroupViT](model_doc/groupvit) | ✅ | ✅ | ❌ |
| [Helium](model_doc/helium) | ✅ | ❌ | ❌ |
| [HerBERT](model_doc/herbert) | ✅ | ✅ | ✅ |
| [Hiera](model_doc/hiera) | ✅ | ❌ | ❌ |
| [Hubert](model_doc/hubert) | ✅ | ✅ | ❌ |
@ -186,7 +172,6 @@ Flax), PyTorch, and/or TensorFlow.
| [IDEFICS](model_doc/idefics) | ✅ | ✅ | ❌ |
| [Idefics2](model_doc/idefics2) | ✅ | ❌ | ❌ |
| [Idefics3](model_doc/idefics3) | ✅ | ❌ | ❌ |
| [Idefics3VisionTransformer](model_doc/idefics3_vision) | ❌ | ❌ | ❌ |
| [ImageGPT](model_doc/imagegpt) | ✅ | ❌ | ❌ |
| [Informer](model_doc/informer) | ✅ | ❌ | ❌ |
| [InstructBLIP](model_doc/instructblip) | ✅ | ❌ | ❌ |
@ -240,8 +225,6 @@ Flax), PyTorch, and/or TensorFlow.
| [MobileNetV2](model_doc/mobilenet_v2) | ✅ | ❌ | ❌ |
| [MobileViT](model_doc/mobilevit) | ✅ | ✅ | ❌ |
| [MobileViTV2](model_doc/mobilevitv2) | ✅ | ❌ | ❌ |
| [ModernBERT](model_doc/modernbert) | ✅ | ❌ | ❌ |
| [Moonshine](model_doc/moonshine) | ✅ | ❌ | ❌ |
| [Moshi](model_doc/moshi) | ✅ | ❌ | ❌ |
| [MPNet](model_doc/mpnet) | ✅ | ✅ | ❌ |
| [MPT](model_doc/mpt) | ✅ | ❌ | ❌ |
@ -289,7 +272,6 @@ Flax), PyTorch, and/or TensorFlow.
| [PVTv2](model_doc/pvt_v2) | ✅ | ❌ | ❌ |
| [QDQBert](model_doc/qdqbert) | ✅ | ❌ | ❌ |
| [Qwen2](model_doc/qwen2) | ✅ | ❌ | ❌ |
| [Qwen2_5_VL](model_doc/qwen2_5_vl) | ✅ | ❌ | ❌ |
| [Qwen2Audio](model_doc/qwen2_audio) | ✅ | ❌ | ❌ |
| [Qwen2MoE](model_doc/qwen2_moe) | ✅ | ❌ | ❌ |
| [Qwen2VL](model_doc/qwen2_vl) | ✅ | ❌ | ❌ |
@ -307,7 +289,6 @@ Flax), PyTorch, and/or TensorFlow.
| [RoFormer](model_doc/roformer) | ✅ | ✅ | ✅ |
| [RT-DETR](model_doc/rt_detr) | ✅ | ❌ | ❌ |
| [RT-DETR-ResNet](model_doc/rt_detr_resnet) | ✅ | ❌ | ❌ |
| [RT-DETRv2](model_doc/rt_detr_v2) | ✅ | ❌ | ❌ |
| [RWKV](model_doc/rwkv) | ✅ | ❌ | ❌ |
| [SAM](model_doc/sam) | ✅ | ✅ | ❌ |
| [SeamlessM4T](model_doc/seamless_m4t) | ✅ | ❌ | ❌ |
@ -324,7 +305,6 @@ Flax), PyTorch, and/or TensorFlow.
| [SqueezeBERT](model_doc/squeezebert) | ✅ | ❌ | ❌ |
| [StableLm](model_doc/stablelm) | ✅ | ❌ | ❌ |
| [Starcoder2](model_doc/starcoder2) | ✅ | ❌ | ❌ |
| [SuperGlue](model_doc/superglue) | ✅ | ❌ | ❌ |
| [SuperPoint](model_doc/superpoint) | ✅ | ❌ | ❌ |
| [SwiftFormer](model_doc/swiftformer) | ✅ | ✅ | ❌ |
| [Swin Transformer](model_doc/swin) | ✅ | ✅ | ❌ |
@ -336,10 +316,8 @@ Flax), PyTorch, and/or TensorFlow.
| [Table Transformer](model_doc/table-transformer) | ✅ | ❌ | ❌ |
| [TAPAS](model_doc/tapas) | ✅ | ✅ | ❌ |
| [TAPEX](model_doc/tapex) | ✅ | ✅ | ✅ |
| [TextNet](model_doc/textnet) | ✅ | ❌ | ❌ |
| [Time Series Transformer](model_doc/time_series_transformer) | ✅ | ❌ | ❌ |
| [TimeSformer](model_doc/timesformer) | ✅ | ❌ | ❌ |
| [TimmWrapperModel](model_doc/timm_wrapper) | ✅ | ❌ | ❌ |
| [Trajectory Transformer](model_doc/trajectory_transformer) | ✅ | ❌ | ❌ |
| [Transformer-XL](model_doc/transfo-xl) | ✅ | ✅ | ❌ |
| [TrOCR](model_doc/trocr) | ✅ | ❌ | ❌ |
@ -366,8 +344,6 @@ Flax), PyTorch, and/or TensorFlow.
| [ViTMAE](model_doc/vit_mae) | ✅ | ✅ | ❌ |
| [ViTMatte](model_doc/vitmatte) | ✅ | ❌ | ❌ |
| [ViTMSN](model_doc/vit_msn) | ✅ | ❌ | ❌ |
| [ViTPose](model_doc/vitpose) | ✅ | ❌ | ❌ |
| [ViTPoseBackbone](model_doc/vitpose_backbone) | ✅ | ❌ | ❌ |
| [VITS](model_doc/vits) | ✅ | ❌ | ❌ |
| [ViViT](model_doc/vivit) | ✅ | ❌ | ❌ |
| [Wav2Vec2](model_doc/wav2vec2) | ✅ | ✅ | ✅ |
@ -390,7 +366,6 @@ Flax), PyTorch, and/or TensorFlow.
| [YOLOS](model_doc/yolos) | ✅ | ❌ | ❌ |
| [YOSO](model_doc/yoso) | ✅ | ❌ | ❌ |
| [Zamba](model_doc/zamba) | ✅ | ❌ | ❌ |
| [Zamba2](model_doc/zamba2) | ✅ | ❌ | ❌ |
| [ZoeDepth](model_doc/zoedepth) | ✅ | ❌ | ❌ |
<!-- End table-->

View File

@ -32,40 +32,29 @@ Install 🤗 Transformers for whichever deep learning library you're working wit
You should install 🤗 Transformers in a [virtual environment](https://docs.python.org/3/library/venv.html). If you're unfamiliar with Python virtual environments, take a look at this [guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/). A virtual environment makes it easier to manage different projects, and avoid compatibility issues between dependencies.
Create a virtual environment with [uv](https://docs.astral.sh/uv/) (refer to [Installation](https://docs.astral.sh/uv/getting-started/installation/) for installation instructions), a fast Rust-based Python package and project manager.
Start by creating a virtual environment in your project directory:
```bash
uv venv my-env
source my-env/bin/activate
python -m venv .env
```
Now you're ready to install 🤗 Transformers with pip or uv.
<hfoptions id="install">
<hfoption id="uv">
Activate the virtual environment. On Linux and MacOs:
```bash
uv pip install transformers
source .env/bin/activate
```
Activate Virtual environment on Windows
```bash
.env/Scripts/activate
```
</hfoption>
<hfoption id="pip">
Now you're ready to install 🤗 Transformers with the following command:
```bash
pip install transformers
```
</hfoption>
</hfoptions>
For GPU acceleration, install the appropriate CUDA drivers for [PyTorch](https://pytorch.org/get-started/locally) and TensorFlow(https://www.tensorflow.org/install/pip).
Run the command below to check if your system detects an NVIDIA GPU.
```bash
nvidia-smi
```
For CPU-support only, you can conveniently install 🤗 Transformers and a deep learning library in one line. For example, install 🤗 Transformers and PyTorch with:
```bash
@ -265,36 +254,3 @@ Once your file is downloaded and locally cached, specify it's local path to load
See the [How to download files from the Hub](https://huggingface.co/docs/hub/how-to-downstream) section for more details on downloading files stored on the Hub.
</Tip>
## Troubleshooting
See below for some of the more common installation issues and how to resolve them.
### Unsupported Python version
Ensure you are using Python 3.9 or later. Run the command below to check your Python version.
```
python --version
```
### Missing dependencies
Install all required dependencies by running the following command. Ensure youre in the project directory before executing the command.
```
pip install -r requirements.txt
```
### Windows-specific
If you encounter issues on Windows, you may need to activate Developer Mode. Navigate to Windows Settings > For Developers > Developer Mode.
Alternatively, create and activate a virtual environment as shown below.
```
python -m venv env
.\env\Scripts\activate
```

View File

@ -352,8 +352,6 @@ A [`Constraint`] can be used to force the generation to include specific tokens
[[autodoc]] TextIteratorStreamer
[[autodoc]] AsyncTextIteratorStreamer
## Caches
[[autodoc]] Cache

View File

@ -56,16 +56,16 @@ More concretely, key-value cache acts as a memory bank for these generative mode
>>> import torch
>>> from transformers import AutoTokenizer, AutoModelForCausalLM, DynamicCache
>>> model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
>>> model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
>>> model_id = "meta-llama/Llama-2-7b-chat-hf"
>>> model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="cuda:0")
>>> tokenizer = AutoTokenizer.from_pretrained(model_id)
>>> past_key_values = DynamicCache()
>>> messages = [{"role": "user", "content": "Hello, what's your name."}]
>>> inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt", return_dict=True).to(model.device)
>>> inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt", return_dict=True).to("cuda:0")
>>> generated_ids = inputs.input_ids
>>> cache_position = torch.arange(inputs.input_ids.shape[1], dtype=torch.int64, device=model.device)
>>> cache_position = torch.arange(inputs.input_ids.shape[1], dtype=torch.int64, device="cuda:0")
>>> max_new_tokens = 10
>>> for _ in range(max_new_tokens):
@ -82,13 +82,7 @@ More concretely, key-value cache acts as a memory bank for these generative mode
... cache_position = cache_position[-1:] + 1 # add one more position for the next token
>>> print(tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0])
```
```txt
<|user|>
Hello, what's your name.
<|assistant|>
My name is Sarah.
<|
"[INST] Hello, what's your name. [/INST] Hello! My name is LLaMA,"
```
</details>
@ -138,13 +132,17 @@ Cache quantization can be detrimental in terms of latency if the context length
>>> import torch
>>> from transformers import AutoTokenizer, AutoModelForCausalLM
>>> tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
>>> model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto")
>>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
>>> model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16).to("cuda:0")
>>> inputs = tokenizer("I like rock music because", return_tensors="pt").to(model.device)
>>> out = model.generate(**inputs, do_sample=False, max_new_tokens=20, cache_implementation="quantized", cache_config={"nbits": 4, "backend": "quanto"})
>>> print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
I like rock music because it's a great way to express myself. I like the way it makes me feel, the
I like rock music because it's loud and energetic. It's a great way to express myself and rel
>>> out = model.generate(**inputs, do_sample=False, max_new_tokens=20)
>>> print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
I like rock music because it's loud and energetic. I like to listen to it when I'm feeling
```
### Offloaded Cache
@ -168,7 +166,7 @@ Use `cache_implementation="offloaded_static"` for an offloaded static cache (see
>>> ckpt = "microsoft/Phi-3-mini-4k-instruct"
>>> tokenizer = AutoTokenizer.from_pretrained(ckpt)
>>> model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16, device_map="auto")
>>> model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16).to("cuda:0")
>>> inputs = tokenizer("Fun fact: The shortest", return_tensors="pt").to(model.device)
>>> out = model.generate(**inputs, do_sample=False, max_new_tokens=23, cache_implementation="offloaded")
@ -233,14 +231,14 @@ For more examples with Static Cache and JIT compilation, take a look at [StaticC
>>> import torch
>>> from transformers import AutoTokenizer, AutoModelForCausalLM
>>> tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
>>> model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto")
>>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
>>> model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16, device_map="auto")
>>> inputs = tokenizer("Hello, my name is", return_tensors="pt").to(model.device)
>>> # simply pass the cache implementation="static"
>>> out = model.generate(**inputs, do_sample=False, max_new_tokens=20, cache_implementation="static")
>>> tokenizer.batch_decode(out, skip_special_tokens=True)[0]
"Hello, my name is [Your Name] and I am a [Your Position] at [Your Company]. I am writing"
"Hello, my name is [Your Name], and I am a [Your Profession] with [Number of Years] of"
```
@ -258,7 +256,7 @@ This will use the [`~OffloadedStaticCache`] implementation instead.
>>> model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16, device_map="auto")
>>> inputs = tokenizer("Hello, my name is", return_tensors="pt").to(model.device)
>>> # simply pass the cache implementation="offloaded_static"
>>> # simply pass the cache implementation="static"
>>> out = model.generate(**inputs, do_sample=False, max_new_tokens=20, cache_implementation="offloaded_static")
>>> tokenizer.batch_decode(out, skip_special_tokens=True)[0]
"Hello, my name is [Your Name], and I am a [Your Profession] with [Number of Years] of"
@ -277,14 +275,14 @@ Note that you can use this cache only for models that support sliding window, e.
>>> import torch
>>> from transformers import AutoTokenizer, AutoModelForCausalLM, SinkCache
>>> tokenizer = AutoTokenizer.from_pretrained("teknium/OpenHermes-2.5-Mistral-7B")
>>> model = AutoModelForCausalLM.from_pretrained("teknium/OpenHermes-2.5-Mistral-7B", torch_dtype=torch.float16, device_map="auto")
>>> tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
>>> model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", torch_dtype=torch.float16).to("cuda:0")
>>> inputs = tokenizer("Yesterday I was on a rock concert and.", return_tensors="pt").to(model.device)
>>> # can be used by passing in cache implementation
>>> out = model.generate(**inputs, do_sample=False, max_new_tokens=30, cache_implementation="sliding_window")
>>> tokenizer.batch_decode(out, skip_special_tokens=True)[0]
"Yesterday I was on a rock concert and. I was so excited to see my favorite band perform live. I was so happy that I could hardly contain myself. I was jumping up and down and"
"Yesterday I was on a rock concert and. I was so excited to see my favorite band. I was so excited that I was jumping up and down and screaming. I was so excited that I"
```
### Sink Cache
@ -297,8 +295,8 @@ Unlike other cache classes, this one can't be used directly by indicating a `cac
>>> import torch
>>> from transformers import AutoTokenizer, AutoModelForCausalLM, SinkCache
>>> tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
>>> model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto")
>>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
>>> model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16).to("cuda:0")
>>> inputs = tokenizer("This is a long story about unicorns, fairies and magic.", return_tensors="pt").to(model.device)
>>> # get our cache, specify number of sink tokens and window size
@ -306,7 +304,7 @@ Unlike other cache classes, this one can't be used directly by indicating a `cac
>>> past_key_values = SinkCache(window_length=256, num_sink_tokens=4)
>>> out = model.generate(**inputs, do_sample=False, max_new_tokens=30, past_key_values=past_key_values)
>>> tokenizer.batch_decode(out, skip_special_tokens=True)[0]
"This is a long story about unicorns, fairies and magic. It is a story about a young girl named Lily who discovers that she has the power to control the elements. She learns that she can"
"This is a long story about unicorns, fairies and magic. It is a fantasy world where unicorns and fairies live together in harmony. The story follows a young girl named Lily"
```
### Encoder-Decoder Cache
@ -334,22 +332,22 @@ In case you are using Sink Cache, you have to crop your inputs to that maximum l
>>> import torch
>>> from transformers import AutoTokenizer,AutoModelForCausalLM
>>> from transformers.cache_utils import (
... DynamicCache,
... SinkCache,
... StaticCache,
... SlidingWindowCache,
... QuantoQuantizedCache,
... QuantizedCacheConfig,
... )
>>> DynamicCache,
>>> SinkCache,
>>> StaticCache,
>>> SlidingWindowCache,
>>> QuantoQuantizedCache,
>>> QuantizedCacheConfig,
>>> )
>>> model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
>>> model_id = "meta-llama/Llama-2-7b-chat-hf"
>>> model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map='auto')
>>> tokenizer = AutoTokenizer.from_pretrained(model_id)
>>> user_prompts = ["Hello, what's your name?", "Btw, yesterday I was on a rock concert."]
>>> past_key_values = DynamicCache()
>>> max_cache_length = past_key_values.get_max_cache_shape()
>>> max_cache_length = past_key_values.get_max_length()
>>> messages = []
>>> for prompt in user_prompts:
@ -365,7 +363,7 @@ In case you are using Sink Cache, you have to crop your inputs to that maximum l
... messages.append({"role": "assistant", "content": completion})
print(messages)
[{'role': 'user', 'content': "Hello, what's your name?"}, {'role': 'assistant', 'content': "Hello, I'm AI."}, {'role': 'user', 'content': 'Btw, yesterday I was on a rock concert.'}, {'role': 'assistant', 'content': "I'm sorry to hear that you were on a rock concert yesterday. It sounds like a fun experience, but I'm not capable of experiencing music or concerts. However, I can provide you with some information about rock music and its history. Rock music emerged in the 1950s and 1960s in the United States and Britain, and it quickly gained popularity around the world. Some of the most famous rock bands of all time include The Beatles, The Rolling Stones, Led Zeppelin, and Pink Floyd. Rock music has a distinct sound and style, with elements of blues, country, and folk music. It often features guitar solos, heavy bass lines, and drums. Rock music has had a significant impact on popular culture, influencing genres such as punk rock, heavy metal, and alternative rock."}]
[{'role': 'user', 'content': "Hello, what's your name?"}, {'role': 'assistant', 'content': " Hello! My name is LLaMA, I'm a large language model trained by a team of researcher at Meta AI. 😊"}, {'role': 'user', 'content': 'Btw, yesterday I was on a rock concert.'}, {'role': 'assistant', 'content': ' Oh, cool! That sounds like a lot of fun! 🎉 Did you enjoy the concert? What was the band like? 🤔'}]
```
@ -377,19 +375,17 @@ Sometimes you would want to first fill-in cache object with key/values for certa
>>> import copy
>>> import torch
>>> from transformers import AutoModelForCausalLM, AutoTokenizer, DynamicCache, StaticCache
>>> from accelerate.test_utils.testing import get_backend
>>> DEVICE, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
>>> model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
>>> model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map=DEVICE)
>>> model_id = "meta-llama/Llama-2-7b-chat-hf"
>>> model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="cuda")
>>> tokenizer = AutoTokenizer.from_pretrained(model_id)
>>> # Init StaticCache with big enough max-length (1024 tokens for the below example)
>>> # You can also init a DynamicCache, if that suits you better
>>> prompt_cache = StaticCache(config=model.config, max_batch_size=1, max_cache_len=1024, device=DEVICE, dtype=torch.bfloat16)
>>> prompt_cache = StaticCache(config=model.config, max_batch_size=1, max_cache_len=1024, device="cuda", dtype=torch.bfloat16)
>>> INITIAL_PROMPT = "You are a helpful assistant. "
>>> inputs_initial_prompt = tokenizer(INITIAL_PROMPT, return_tensors="pt").to(DEVICE)
>>> inputs_initial_prompt = tokenizer(INITIAL_PROMPT, return_tensors="pt").to("cuda")
>>> # This is the common prompt cached, we need to run forward without grad to be abel to copy
>>> with torch.no_grad():
... prompt_cache = model(**inputs_initial_prompt, past_key_values = prompt_cache).past_key_values
@ -397,14 +393,14 @@ Sometimes you would want to first fill-in cache object with key/values for certa
>>> prompts = ["Help me to write a blogpost about travelling.", "What is the capital of France?"]
>>> responses = []
>>> for prompt in prompts:
... new_inputs = tokenizer(INITIAL_PROMPT + prompt, return_tensors="pt").to(DEVICE)
... new_inputs = tokenizer(INITIAL_PROMPT + prompt, return_tensors="pt").to("cuda")
... past_key_values = copy.deepcopy(prompt_cache)
... outputs = model.generate(**new_inputs, past_key_values=past_key_values,max_new_tokens=20)
... response = tokenizer.batch_decode(outputs)[0]
... responses.append(response)
>>> print(responses)
['<s> You are a helpful assistant. Help me to write a blogpost about travelling. I am excited to share my experiences with you. I have been traveling for the past', '<s> You are a helpful assistant. What is the capital of France? \n\nAnswer: Paris is the capital of France.</s>']
['<s> You are a helpful assistant. Help me to write a blogpost about travelling.\n\nTitle: The Ultimate Guide to Travelling: Tips, Tricks, and', '<s> You are a helpful assistant. What is the capital of France?\n\nYes, the capital of France is Paris.</s>']
```
@ -418,8 +414,8 @@ this legacy format, you can seamlessly convert it to a `DynamicCache` and back.
>>> import torch
>>> from transformers import AutoTokenizer, AutoModelForCausalLM, DynamicCache
>>> tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
>>> model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto")
>>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
>>> model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16, device_map="auto")
>>> inputs = tokenizer("Hello, my name is", return_tensors="pt").to(model.device)
>>> # `return_dict_in_generate=True` is required to return the cache. `return_legacy_cache` forces the returned cache

View File

@ -156,11 +156,9 @@ def decode_one_tokens(model, cur_token, input_pos, cache_position, past_key_valu
There are a few important things you must do to enable static kv-cache and `torch.compile` with the `StaticCache` method:
1. Initialize the [`StaticCache`] instance before using the model for inference. There you can configure parameters like the maximum batch size and sequence length.
2. Call `torch.compile` on the model to compile the forward pass with the static kv-cache.
3. Use `SDPBackend.MATH` in the [torch.nn.attention.sdpa_kernel](https://pytorch.org/docs/stable/generated/torch.nn.attention.sdpa_kernel.html) context manager to enable the native PyTorch C++ implementation of scaled dot product attention to speed up inference even more.
3. Set `enable_math=True` in the [torch.backends.cuda.sdp_kernel](https://pytorch.org/docs/master/generated/torch.nn.functional.scaled_dot_product_attention.html) context manager to enable the native PyTorch C++ implementation of scaled dot product attention to speed up inference even more.
```py
from torch.nn.attention import SDPBackend, sdpa_kernel
batch_size, seq_length = inputs["input_ids"].shape
with torch.no_grad():
past_key_values = StaticCache(
@ -181,7 +179,7 @@ with torch.no_grad():
decode_one_tokens = torch.compile(decode_one_tokens, mode="reduce-overhead", fullgraph=True)
cache_position = torch.tensor([seq_length + 1], device=torch_device)
for _ in range(1, NUM_TOKENS_TO_GENERATE):
with sdpa_kernel(SDPBackend.MATH):
with torch.backends.cuda.sdp_kernel(enable_flash=False, enable_mem_efficient=False, enable_math=True):
next_token = decode_one_tokens(model, next_token.clone(), None, cache_position, past_key_values)
generated_ids[:, cache_position] = next_token.int()
cache_position += 1
@ -455,11 +453,10 @@ Scaled dot product attention (SDPA) is automatically enabled in PyTorch 2.0 and
> [!TIP]
> SDPA supports FlashAttention-2 as long as you have the latest PyTorch version installed.
Use the [torch.nn.attention.sdpa_kernel](https://pytorch.org/docs/stable/generated/torch.nn.attention.sdpa_kernel.html) context manager to explicitly enable or disable any of the four attention algorithms. For example, use `SDPBackend.FLASH_ATTENTION` to enable FlashAttention.
Use the [torch.backends.cuda.sdp_kernel](https://pytorch.org/docs/master/generated/torch.nn.functional.scaled_dot_product_attention.html) context manager to explicitly enable or disable any of the three attention algorithms. For example, set `enable_flash=True` to enable FlashAttention.
```py
import torch
from torch.nn.attention import SDPBackend, sdpa_kernel
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(
@ -467,7 +464,7 @@ model = AutoModelForCausalLM.from_pretrained(
torch_dtype=torch.bfloat16,
)
with sdpa_kernel(SDPBackend.FLASH_ATTENTION):
with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
outputs = model.generate(**inputs)
```
@ -476,7 +473,7 @@ with sdpa_kernel(SDPBackend.FLASH_ATTENTION):
Quantization reduces the size of the LLM weights by storing them in a lower precision. This translates to lower memory usage and makes loading LLMs for inference more accessible if you're constrained by your GPUs memory. If you aren't limited by your GPU, you don't necessarily need to quantize your model because it can incur a small latency cost (except for AWQ and fused AWQ modules) due to the extra step required to quantize and dequantize the weights.
> [!TIP]
> There are many quantization libraries (see the [Quantization](./quantization) guide for more details) available, such as Quanto, AQLM, VPTQ, AWQ, and AutoGPTQ. Feel free to try them out and see which one works best for your use case. We also recommend reading the [Overview of natively supported quantization schemes in 🤗 Transformers](https://hf.co/blog/overview-quantization-transformers) blog post which compares AutoGPTQ and bitsandbytes.
> There are many quantization libraries (see the [Quantization](./quantization) guide for more details) available, such as Quanto, AQLM, AWQ, and AutoGPTQ. Feel free to try them out and see which one works best for your use case. We also recommend reading the [Overview of natively supported quantization schemes in 🤗 Transformers](https://hf.co/blog/overview-quantization-transformers) blog post which compares AutoGPTQ and bitsandbytes.
Use the Model Memory Calculator below to estimate and compare how much memory is required to load a model. For example, try estimating how much memory it costs to load [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1).

View File

@ -23,12 +23,6 @@ LLMs, or Large Language Models, are the key component behind text generation. In
Autoregressive generation is the inference-time procedure of iteratively calling a model with its own generated outputs, given a few initial inputs. In 🤗 Transformers, this is handled by the [`~generation.GenerationMixin.generate`] method, which is available to all models with generative capabilities.
<Tip>
If you want to jump straight to chatting with a model, [try our chat CLI](quicktour#chat-with-text-generation-models).
</Tip>
This tutorial will show you how to:
* Generate text with an LLM
@ -40,7 +34,6 @@ Before you begin, make sure you have all the necessary libraries installed:
```bash
pip install transformers bitsandbytes>=0.39.0 -q
```
Bitsandbytes supports multiple backends in addition to CUDA-based GPUs. Refer to the multi-backend installation [guide](https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend) to learn more.
## Generate text
@ -102,11 +95,9 @@ Next, you need to preprocess your text input with a [tokenizer](tokenizer_summar
```py
>>> from transformers import AutoTokenizer
>>> from accelerate.test_utils.testing import get_backend
>>> DEVICE, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
>>> tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")
>>> model_inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to(DEVICE)
>>> model_inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to("cuda")
```
The `model_inputs` variable holds the tokenized text input, as well as the attention mask. While [`~generation.GenerationMixin.generate`] does its best effort to infer the attention mask when it is not passed, we recommend passing it whenever possible for optimal results.
@ -125,7 +116,7 @@ Finally, you don't need to do it one sequence at a time! You can batch your inpu
>>> tokenizer.pad_token = tokenizer.eos_token # Most LLMs don't have a pad token by default
>>> model_inputs = tokenizer(
... ["A list of colors: red, blue", "Portugal is"], return_tensors="pt", padding=True
... ).to(DEVICE)
... ).to("cuda")
>>> generated_ids = model.generate(**model_inputs)
>>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
['A list of colors: red, blue, green, yellow, orange, purple, pink,',
@ -155,7 +146,7 @@ If not specified in the [`~generation.GenerationConfig`] file, `generate` return
```py
>>> model_inputs = tokenizer(["A sequence of numbers: 1, 2"], return_tensors="pt").to(DEVICE)
>>> model_inputs = tokenizer(["A sequence of numbers: 1, 2"], return_tensors="pt").to("cuda")
>>> # By default, the output will contain up to 20 tokens
>>> generated_ids = model.generate(**model_inputs)
@ -177,7 +168,7 @@ By default, and unless specified in the [`~generation.GenerationConfig`] file, `
>>> from transformers import set_seed
>>> set_seed(42)
>>> model_inputs = tokenizer(["I am a cat."], return_tensors="pt").to(DEVICE)
>>> model_inputs = tokenizer(["I am a cat."], return_tensors="pt").to("cuda")
>>> # LLM + greedy decoding = repetitive, boring output
>>> generated_ids = model.generate(**model_inputs)
@ -199,7 +190,7 @@ LLMs are [decoder-only](https://huggingface.co/learn/nlp-course/chapter1/6?fw=pt
>>> # which is shorter, has padding on the right side. Generation fails to capture the logic.
>>> model_inputs = tokenizer(
... ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
... ).to(DEVICE)
... ).to("cuda")
>>> generated_ids = model.generate(**model_inputs)
>>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
'1, 2, 33333333333'
@ -209,7 +200,7 @@ LLMs are [decoder-only](https://huggingface.co/learn/nlp-course/chapter1/6?fw=pt
>>> tokenizer.pad_token = tokenizer.eos_token # Most LLMs don't have a pad token by default
>>> model_inputs = tokenizer(
... ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
... ).to(DEVICE)
... ).to("cuda")
>>> generated_ids = model.generate(**model_inputs)
>>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
'1, 2, 3, 4, 5, 6,'
@ -226,7 +217,7 @@ Some models and tasks expect a certain input prompt format to work properly. Whe
... )
>>> set_seed(0)
>>> prompt = """How many helicopters can a human eat in one sitting? Reply as a thug."""
>>> model_inputs = tokenizer([prompt], return_tensors="pt").to(DEVICE)
>>> model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
>>> input_length = model_inputs.input_ids.shape[1]
>>> generated_ids = model.generate(**model_inputs, max_new_tokens=20)
>>> print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
@ -242,7 +233,7 @@ Some models and tasks expect a certain input prompt format to work properly. Whe
... },
... {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
... ]
>>> model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(DEVICE)
>>> model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to("cuda")
>>> input_length = model_inputs.shape[1]
>>> generated_ids = model.generate(model_inputs, do_sample=True, max_new_tokens=20)
>>> print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
@ -274,9 +265,8 @@ While the autoregressive generation process is relatively straightforward, makin
### Related libraries
1. [`optimum`](https://github.com/huggingface/optimum), an extension of 🤗 Transformers that optimizes for specific hardware devices;
1. [`optimum`](https://github.com/huggingface/optimum), an extension of 🤗 Transformers that optimizes for specific hardware devices.
2. [`outlines`](https://github.com/outlines-dev/outlines), a library where you can constrain text generation (e.g. to generate JSON files);
3. [`SynCode`](https://github.com/uiuc-focal-lab/syncode), a library for context-free grammar guided generation (e.g. JSON, SQL, Python);
3. [`SynCode`](https://github.com/uiuc-focal-lab/syncode), a library for context-free grammar guided generation. (e.g. JSON, SQL, Python)
4. [`text-generation-inference`](https://github.com/huggingface/text-generation-inference), a production-ready server for LLMs;
5. [`text-generation-webui`](https://github.com/oobabooga/text-generation-webui), a UI for text generation;
6. [`logits-processor-zoo`](https://github.com/NVIDIA/logits-processor-zoo), containing additional options to control text generation with 🤗 Transformers. See our related [blog post](https://huggingface.co/blog/logits-processor-zoo).

View File

@ -71,6 +71,3 @@ Examples of use can be found in the [example scripts](../examples) or [example n
[[autodoc]] data.data_collator.DataCollatorWithFlattening
# DataCollatorForMultipleChoice
[[autodoc]] data.data_collator.DataCollatorForMultipleChoice

View File

@ -16,7 +16,7 @@ rendered properly in your Markdown viewer.
# DeepSpeed
[DeepSpeed](https://github.com/deepspeedai/DeepSpeed), powered by Zero Redundancy Optimizer (ZeRO), is an optimization library for training and fitting very large models onto a GPU. It is available in several ZeRO stages, where each stage progressively saves more GPU memory by partitioning the optimizer state, gradients, parameters, and enabling offloading to a CPU or NVMe. DeepSpeed is integrated with the [`Trainer`] class and most of the setup is automatically taken care of for you.
[DeepSpeed](https://github.com/microsoft/DeepSpeed), powered by Zero Redundancy Optimizer (ZeRO), is an optimization library for training and fitting very large models onto a GPU. It is available in several ZeRO stages, where each stage progressively saves more GPU memory by partitioning the optimizer state, gradients, parameters, and enabling offloading to a CPU or NVMe. DeepSpeed is integrated with the [`Trainer`] class and most of the setup is automatically taken care of for you.
However, if you want to use DeepSpeed without the [`Trainer`], Transformers provides a [`HfDeepSpeedConfig`] class.

View File

@ -27,7 +27,6 @@ from transformers import AutoImageProcessor
processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50", use_fast=True)
```
Note that `use_fast` will be set to `True` by default in a future release.
When using a fast image processor, you can also set the `device` argument to specify the device on which the processing should be done. By default, the processing is done on the same device as the inputs if the inputs are tensors, or on the CPU otherwise.
@ -43,17 +42,21 @@ images_processed = processor(images, return_tensors="pt", device="cuda")
Here are some speed comparisons between the base and fast image processors for the `DETR` and `RT-DETR` models, and how they impact overall inference time:
<div class="flex">
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_detr_fast_padded.png" />
</div>
<div class="flex">
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_detr_fast_batched_compiled.png" />
<div>
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_detr_fast_padded.png" />
</div>
<div>
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_detr_fast_batched_compiled.png" />
</div>
</div>
<div class="flex">
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_rt_detr_fast_single.png" />
</div>
<div class="flex">
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_rt_detr_fast_batched.png" />
<div>
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_rt_detr_fast_single.png" />
</div>
<div>
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_rt_detr_fast_batched.png" />
</div>
</div>
These benchmarks were run on an [AWS EC2 g5.2xlarge instance](https://aws.amazon.com/ec2/instance-types/g5/), utilizing an NVIDIA A10G Tensor Core GPU.

View File

@ -34,10 +34,6 @@ Learn how to quantize models in the [Quantization](../quantization) guide.
[[autodoc]] AqlmConfig
## VptqConfig
[[autodoc]] VptqConfig
## AwqConfig
[[autodoc]] AwqConfig
@ -57,10 +53,6 @@ Learn how to quantize models in the [Quantization](../quantization) guide.
[[autodoc]] quantizers.base.HfQuantizer
## HiggsConfig
[[autodoc]] HiggsConfig
## HqqConfig
[[autodoc]] HqqConfig
@ -80,11 +72,3 @@ Learn how to quantize models in the [Quantization](../quantization) guide.
## BitNetConfig
[[autodoc]] BitNetConfig
## SpQRConfig
[[autodoc]] SpQRConfig
## FineGrainedFP8Config
[[autodoc]] FineGrainedFP8Config

View File

@ -1,106 +0,0 @@
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# Aria
## Overview
The Aria model was proposed in [Aria: An Open Multimodal Native Mixture-of-Experts Model](https://huggingface.co/papers/2410.05993) by Li et al. from the Rhymes.AI team.
Aria is an open multimodal-native model with best-in-class performance across a wide range of multimodal, language, and coding tasks. It has a Mixture-of-Experts architecture, with respectively 3.9B and 3.5B activated parameters per visual token and text token.
The abstract from the paper is the following:
*Information comes in diverse modalities. Multimodal native AI models are essential to integrate real-world information and deliver comprehensive understanding. While proprietary multimodal native models exist, their lack of openness imposes obstacles for adoptions, let alone adaptations. To fill this gap, we introduce Aria, an open multimodal native model with best-in-class performance across a wide range of multimodal, language, and coding tasks. Aria is a mixture-of-expert model with 3.9B and 3.5B activated parameters per visual token and text token, respectively. It outperforms Pixtral-12B and Llama3.2-11B, and is competitive against the best proprietary models on various multimodal tasks. We pre-train Aria from scratch following a 4-stage pipeline, which progressively equips the model with strong capabilities in language understanding, multimodal understanding, long context window, and instruction following. We open-source the model weights along with a codebase that facilitates easy adoptions and adaptations of Aria in real-world applications.*
This model was contributed by [m-ric](https://huggingface.co/m-ric).
The original code can be found [here](https://github.com/rhymes-ai/Aria).
## Usage tips
Here's how to use the model for vision tasks:
```python
import requests
import torch
from PIL import Image
from transformers import AriaProcessor, AriaForConditionalGeneration
model_id_or_path = "rhymes-ai/Aria"
model = AriaForConditionalGeneration.from_pretrained(
model_id_or_path, device_map="auto"
)
processor = AriaProcessor.from_pretrained(model_id_or_path)
image = Image.open(requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw)
messages = [
{
"role": "user",
"content": [
{"type": "image"},
{"text": "what is the image?", "type": "text"},
],
}
]
text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(text=text, images=image, return_tensors="pt")
inputs.to(model.device)
output = model.generate(
**inputs,
max_new_tokens=15,
stop_strings=["<|im_end|>"],
tokenizer=processor.tokenizer,
do_sample=True,
temperature=0.9,
)
output_ids = output[0][inputs["input_ids"].shape[1]:]
response = processor.decode(output_ids, skip_special_tokens=True)
```
## AriaImageProcessor
[[autodoc]] AriaImageProcessor
## AriaProcessor
[[autodoc]] AriaProcessor
## AriaTextConfig
[[autodoc]] AriaTextConfig
## AriaConfig
[[autodoc]] AriaConfig
## AriaTextModel
[[autodoc]] AriaTextModel
## AriaTextForCausalLM
[[autodoc]] AriaTextForCausalLM
## AriaForConditionalGeneration
[[autodoc]] AriaForConditionalGeneration
- forward

View File

@ -1,64 +0,0 @@
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# Bamba
## Overview
Bamba-9B is a decoder-only language model based on the [Mamba-2](https://github.com/state-spaces/mamba) architecture and is designed to handle a wide range of text generation tasks. It is trained from scratch using a two-stage training approach. In the first stage, the model is trained on 2 trillion tokens from the Dolma v1.7 dataset. In the second stage, it undergoes additional training on 200 billion tokens, leveraging a carefully curated blend of high-quality data to further refine its performance and enhance output quality.
Checkout all Bamba-9B model checkpoints [here](https://github.com/foundation-model-stack/bamba).
## BambaConfig
| Model | Params | # Layers | Hidden Dim. | Attention Heads | GQA | KV Heads | Context Length | Tied Embeddings |
|-------------------|--------------|----------|-------------|-----------------|-----|----------|----------------|------------------|
| Bamba | 9B (9.78B) | 32 | 4096 | 32 | Yes | 8 | 4096 | True |
[[autodoc]] BambaConfig
<!---
## Usage Tips
Tips:
- The architecture is based on Mamba-2 models.
## BambaModel
[[autodoc]] BambaModel
- forward
-->
## BambaForCausalLM
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained("ibm-fms/Bamba-9B")
tokenizer = AutoTokenizer.from_pretrained("ibm-fms/Bamba-9B")
message = ["Mamba is a snake with following properties "]
inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False)
response = model.generate(**inputs, max_new_tokens=64)
print(tokenizer.batch_decode(response, skip_special_tokens=True)[0])
```
[[autodoc]] BambaForCausalLM
- forward
This HF implementation is contributed by [ani300](https://github.com/ani300) and [fabianlim](https://github.com/fabianlim).

View File

@ -71,43 +71,6 @@ alt="drawing" width="600"/>
<small> BEiT pre-training. Taken from the <a href="https://arxiv.org/abs/2106.08254">original paper.</a> </small>
### Using Scaled Dot Product Attention (SDPA)
PyTorch includes a native scaled dot-product attention (SDPA) operator as part of `torch.nn.functional`. This function
encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the
[official documentation](https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html)
or the [GPU Inference](https://huggingface.co/docs/transformers/main/en/perf_infer_gpu_one#pytorch-scaled-dot-product-attention)
page for more information.
SDPA is used by default for `torch>=2.1.1` when an implementation is available, but you may also set
`attn_implementation="sdpa"` in `from_pretrained()` to explicitly request SDPA to be used.
```
from transformers import BeitForImageClassification
model = BeitForImageClassification.from_pretrained("microsoft/beit-base-patch16-224", attn_implementation="sdpa", torch_dtype=torch.float16)
...
```
For the best speedups, we recommend loading the model in half-precision (e.g. `torch.float16` or `torch.bfloat16`).
On a local benchmark (NVIDIA GeForce RTX 2060-8GB, PyTorch 2.5.1, OS Ubuntu 20.04) with `float16` and
`microsoft/beit-base-patch16-224` model, we saw the following improvements during training and inference:
#### Training
| num_training_steps | batch_size | image_size | is_cuda | Time per batch (eager - s) | Time per batch (sdpa - s) | Speedup (%) | Eager peak mem (MB) | SDPA peak mem (MB) | Mem saving (%) |
|--------------------|------------|--------------|---------|----------------------------|---------------------------|-------------|----------------------|--------------------|----------------|
| 50 | 2 | (1048, 640) | True | 0.984 | 0.746 | 31.975 | 6738.915 | 4319.886 | 55.998 |
#### Inference
| Image batch size | Eager (s/iter) | Eager CI, % | Eager memory (MB) | SDPA (s/iter) | SDPA CI, % | SDPA memory (MB) | SDPA speedup | SDPA memory saved (%) |
|-------------------:|-----------------:|:--------------|--------------------:|----------------:|:-------------|-------------------:|---------------:|----------------------:|
| 1 | 0.012 | ±0.3% | 3.76657e+08 | 0.011 | ±0.5% | 3.75739e+08 | 1.05 | 0.244 |
| 4 | 0.013 | ±0.1% | 4.03147e+08 | 0.011 | ±0.2% | 3.90554e+08 | 1.178 | 3.225 |
| 16 | 0.045 | ±0.1% | 4.96697e+08 | 0.035 | ±0.1% | 4.51232e+08 | 1.304 | 10.076 |
| 32 | 0.088 | ±0.1% | 6.24417e+08 | 0.066 | ±0.1% | 5.33488e+08 | 1.325 | 17.044 |
## Resources
A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.

View File

@ -61,11 +61,6 @@ The original code can be found [here](https://github.com/salesforce/BLIP).
[[autodoc]] BlipImageProcessor
- preprocess
## BlipImageProcessorFast
[[autodoc]] BlipImageProcessorFast
- preprocess
<frameworkcontent>
<pt>

View File

@ -251,11 +251,6 @@ The resource should ideally demonstrate something new instead of duplicating an
[[autodoc]] CLIPImageProcessor
- preprocess
## CLIPImageProcessorFast
[[autodoc]] CLIPImageProcessorFast
- preprocess
## CLIPFeatureExtractor
[[autodoc]] CLIPFeatureExtractor

View File

@ -1,51 +0,0 @@
# Cohere
## Overview
[C4AI Command R7B](https://cohere.com/blog/command-r7b) is an open weights research release of a 7B billion parameter model developed by Cohere and Cohere For AI. It has advanced capabilities optimized for various use cases, including reasoning, summarization, question answering, and code. The model is trained to perform sophisticated tasks including Retrieval Augmented Generation (RAG) and tool use. The model also has powerful agentic capabilities that can use and combine multiple tools over multiple steps to accomplish more difficult tasks. It obtains top performance on enterprise-relevant code use cases. C4AI Command R7B is a multilingual model trained on 23 languages.
The model features three layers with sliding window attention (window size 4096) and ROPE for efficient local context modeling and relative positional encoding. A fourth layer uses global attention without positional embeddings, enabling unrestricted token interactions across the entire sequence.
The model has been trained on 23 languages: English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Arabic, Chinese, Russian, Polish, Turkish, Vietnamese, Dutch, Czech, Indonesian, Ukrainian, Romanian, Greek, Hindi, Hebrew, and Persian.
## Usage tips
The model and tokenizer can be loaded via:
```python
# pip install transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
model_id = "CohereForAI/c4ai-command-r7b-12-2024"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
# Format message with the command-r chat template
messages = [{"role": "user", "content": "Hello, how are you?"}]
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
gen_tokens = model.generate(
input_ids,
max_new_tokens=100,
do_sample=True,
temperature=0.3,
)
gen_text = tokenizer.decode(gen_tokens[0])
print(gen_text)
```
## Cohere2Config
[[autodoc]] Cohere2Config
## Cohere2Model
[[autodoc]] Cohere2Model
- forward
## Cohere2ForCausalLM
[[autodoc]] Cohere2ForCausalLM
- forward

View File

@ -1,90 +0,0 @@
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# ColPali
## Overview
The *ColPali* model was proposed in [ColPali: Efficient Document Retrieval with Vision Language Models](https://doi.org/10.48550/arXiv.2407.01449) by **Manuel Faysse***, **Hugues Sibille***, **Tony Wu***, Bilel Omrani, Gautier Viaud, Céline Hudelot, Pierre Colombo (* denotes equal contribution). Work lead by ILLUIN Technology.
In our proposed *ColPali* approach, we leverage VLMs to construct efficient multi-vector embeddings directly from document images (“screenshots”) for document retrieval. We train the model to maximize the similarity between these document embeddings and the corresponding query embeddings, using the late interaction method introduced in ColBERT.
Using *ColPali* removes the need for potentially complex and brittle layout recognition and OCR pipelines with a single model that can take into account both the textual and visual content (layout, charts, etc.) of a document.
## Resources
- The *ColPali* arXiv paper can be found [here](https://doi.org/10.48550/arXiv.2407.01449). 📄
- The official blog post detailing ColPali can be found [here](https://huggingface.co/blog/manu/colpali). 📝
- The original model implementation code for the ColPali model and for the `colpali-engine` package can be found [here](https://github.com/illuin-tech/colpali). 🌎
- Cookbooks for learning to use the transformers-native version of *ColPali*, fine-tuning, and similarity maps generation can be found [here](https://github.com/tonywu71/colpali-cookbooks). 📚
This model was contributed by [@tonywu71](https://huggingface.co/tonywu71) and [@yonigozlan](https://huggingface.co/yonigozlan).
## Usage
This example demonstrates how to use *ColPali* to embed both queries and images, calculate their similarity scores, and identify the most relevant matches. For a specific query, you can retrieve the top-k most similar images by selecting the ones with the highest similarity scores.
```python
import torch
from PIL import Image
from transformers import ColPaliForRetrieval, ColPaliProcessor
model_name = "vidore/colpali-v1.2-hf"
model = ColPaliForRetrieval.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="cuda:0", # or "mps" if on Apple Silicon
).eval()
processor = ColPaliProcessor.from_pretrained(model_name)
# Your inputs (replace dummy images with screenshots of your documents)
images = [
Image.new("RGB", (32, 32), color="white"),
Image.new("RGB", (16, 16), color="black"),
]
queries = [
"What is the organizational structure for our R&D department?",
"Can you provide a breakdown of last years financial performance?",
]
# Process the inputs
batch_images = processor(images=images).to(model.device)
batch_queries = processor(text=queries).to(model.device)
# Forward pass
with torch.no_grad():
image_embeddings = model(**batch_images).embeddings
query_embeddings = model(**batch_queries).embeddings
# Score the queries against the images
scores = processor.score_retrieval(query_embeddings, image_embeddings)
```
## ColPaliConfig
[[autodoc]] ColPaliConfig
## ColPaliProcessor
[[autodoc]] ColPaliProcessor
## ColPaliForRetrieval
[[autodoc]] ColPaliForRetrieval
- forward

View File

@ -64,11 +64,6 @@ If you're interested in submitting a resource to be included here, please feel f
[[autodoc]] ConvNextImageProcessor
- preprocess
## ConvNextImageProcessorFast
[[autodoc]] ConvNextImageProcessorFast
- preprocess
<frameworkcontent>
<pt>

View File

@ -1,119 +0,0 @@
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.
-->
# DAB-DETR
## Overview
The DAB-DETR model was proposed in [DAB-DETR: Dynamic Anchor Boxes are Better Queries for DETR](https://arxiv.org/abs/2201.12329) by Shilong Liu, Feng Li, Hao Zhang, Xiao Yang, Xianbiao Qi, Hang Su, Jun Zhu, Lei Zhang.
DAB-DETR is an enhanced variant of Conditional DETR. It utilizes dynamically updated anchor boxes to provide both a reference query point (x, y) and a reference anchor size (w, h), improving cross-attention computation. This new approach achieves 45.7% AP when trained for 50 epochs with a single ResNet-50 model as the backbone.
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/dab_detr_convergence_plot.png"
alt="drawing" width="600"/>
The abstract from the paper is the following:
*We present in this paper a novel query formulation using dynamic anchor boxes
for DETR (DEtection TRansformer) and offer a deeper understanding of the role
of queries in DETR. This new formulation directly uses box coordinates as queries
in Transformer decoders and dynamically updates them layer-by-layer. Using box
coordinates not only helps using explicit positional priors to improve the query-to-feature similarity and eliminate the slow training convergence issue in DETR,
but also allows us to modulate the positional attention map using the box width
and height information. Such a design makes it clear that queries in DETR can be
implemented as performing soft ROI pooling layer-by-layer in a cascade manner.
As a result, it leads to the best performance on MS-COCO benchmark among
the DETR-like detection models under the same setting, e.g., AP 45.7% using
ResNet50-DC5 as backbone trained in 50 epochs. We also conducted extensive
experiments to confirm our analysis and verify the effectiveness of our methods.*
This model was contributed by [davidhajdu](https://huggingface.co/davidhajdu).
The original code can be found [here](https://github.com/IDEA-Research/DAB-DETR).
## How to Get Started with the Model
Use the code below to get started with the model.
```python
import torch
import requests
from PIL import Image
from transformers import AutoModelForObjectDetection, AutoImageProcessor
url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
image = Image.open(requests.get(url, stream=True).raw)
image_processor = AutoImageProcessor.from_pretrained("IDEA-Research/dab-detr-resnet-50")
model = AutoModelForObjectDetection.from_pretrained("IDEA-Research/dab-detr-resnet-50")
inputs = image_processor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
results = image_processor.post_process_object_detection(outputs, target_sizes=torch.tensor([image.size[::-1]]), threshold=0.3)
for result in results:
for score, label_id, box in zip(result["scores"], result["labels"], result["boxes"]):
score, label = score.item(), label_id.item()
box = [round(i, 2) for i in box.tolist()]
print(f"{model.config.id2label[label]}: {score:.2f} {box}")
```
This should output
```
cat: 0.87 [14.7, 49.39, 320.52, 469.28]
remote: 0.86 [41.08, 72.37, 173.39, 117.2]
cat: 0.86 [344.45, 19.43, 639.85, 367.86]
remote: 0.61 [334.27, 75.93, 367.92, 188.81]
couch: 0.59 [-0.04, 1.34, 639.9, 477.09]
```
There are three other ways to instantiate a DAB-DETR model (depending on what you prefer):
Option 1: Instantiate DAB-DETR with pre-trained weights for entire model
```py
>>> from transformers import DabDetrForObjectDetection
>>> model = DabDetrForObjectDetection.from_pretrained("IDEA-Research/dab-detr-resnet-50")
```
Option 2: Instantiate DAB-DETR with randomly initialized weights for Transformer, but pre-trained weights for backbone
```py
>>> from transformers import DabDetrConfig, DabDetrForObjectDetection
>>> config = DabDetrConfig()
>>> model = DabDetrForObjectDetection(config)
```
Option 3: Instantiate DAB-DETR with randomly initialized weights for backbone + Transformer
```py
>>> config = DabDetrConfig(use_pretrained_backbone=False)
>>> model = DabDetrForObjectDetection(config)
```
## DabDetrConfig
[[autodoc]] DabDetrConfig
## DabDetrModel
[[autodoc]] DabDetrModel
- forward
## DabDetrForObjectDetection
[[autodoc]] DabDetrForObjectDetection
- forward

Some files were not shown because too many files have changed in this diff Show More