[run-slow] pixtral

Correct nesting in test
2025-10-21 09:44:02 +08:00 · 2024-12-05 18:45:21 +00:00 · 2024-12-05 18:41:21 +00:00 · 2024-12-05 18:40:35 +00:00 · 2024-12-05 18:36:06 +00:00 · 2024-12-05 18:32:42 +00:00
2292 changed files with 52267 additions and 127076 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -13,7 +13,6 @@ jobs:
    check_circleci_user:
        docker:
            - image: python:3.10-slim
-        resource_class: small
        parallelism: 1
        steps:
            - run: echo $CIRCLE_PROJECT_USERNAME
@ -58,15 +57,15 @@ jobs:
            - run:
                name: "Prepare pipeline parameters"
                command: |
-                    python utils/process_test_artifacts.py
-
+                    python utils/process_test_artifacts.py 
+            
            # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
            # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
            # We used:

            # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
            # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
-
+                
            - store_artifacts:
                path: test_preparation/transformed_artifacts.json
            - store_artifacts:
@ -110,7 +109,7 @@ jobs:
            - run:
                name: "Prepare pipeline parameters"
                command: |
-                    python utils/process_test_artifacts.py
+                    python utils/process_test_artifacts.py 

            # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
            # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
@ -170,7 +169,7 @@ jobs:
            - store_artifacts:
                  path: ~/transformers/installed.txt
            - run: python utils/check_copies.py
-            - run: python utils/check_modular_conversion.py --num_workers 4
+            - run: python utils/check_modular_conversion.py
            - run: python utils/check_table.py
            - run: python utils/check_dummies.py
            - run: python utils/check_repo.py
--- a/.circleci/create_circleci_config.py
+++ b/.circleci/create_circleci_config.py
@ -32,7 +32,7 @@ COMMON_ENV_VARIABLES = {
    "RUN_PT_FLAX_CROSS_TESTS": False,
 }
 # Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical
-COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsfE":None}
+COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsf":None}
 DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}]


@ -40,23 +40,9 @@ class EmptyJob:
    job_name = "empty"

    def to_dict(self):
-        steps = [{"run": 'ls -la'}]
-        if self.job_name == "collection_job":
-            steps.extend(
-                [
-                    "checkout",
-                    {"run": "pip install requests || true"},
-                    {"run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true"""},
-                    {"run": 'python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true'},
-                    {"store_artifacts": {"path": "outputs"}},
-                    {"run": 'echo "All required jobs have now completed"'},
-                ]
-            )
-
        return {
            "docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE),
-            "resource_class": "small",
-            "steps": steps,
+            "steps":["checkout"],
        }


@ -68,9 +54,9 @@ class CircleCIJob:
    install_steps: List[str] = None
    marker: Optional[str] = None
    parallelism: Optional[int] = 0
-    pytest_num_workers: int = 8
+    pytest_num_workers: int = 12
    pytest_options: Dict[str, Any] = None
-    resource_class: Optional[str] = "xlarge"
+    resource_class: Optional[str] = "2xlarge"
    tests_to_run: Optional[List[str]] = None
    num_test_files_per_worker: Optional[int] = 10
    # This should be only used for doctest job!
@ -147,7 +133,7 @@ class CircleCIJob:
                "command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""}
            },
            {"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}},
-            {"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <<pipeline.parameters.{self.job_name}_test_list>> --header "Circle-Token: $CIRCLE_TOKEN"' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}},
+            {"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <<pipeline.parameters.{self.job_name}_test_list>>' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}},
                        {"run": {"name": "Split tests across parallel nodes: show current parallel tests",
                    "command": f"TESTS=$(circleci tests split  --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt"
                    }
@ -199,6 +185,7 @@ torch_job = CircleCIJob(
    docker_image=[{"image": "huggingface/transformers-torch-light"}],
    marker="not generate",
    parallelism=6,
+    pytest_num_workers=8
 )

 generate_job = CircleCIJob(
@ -206,24 +193,28 @@ generate_job = CircleCIJob(
    docker_image=[{"image": "huggingface/transformers-torch-light"}],
    marker="generate",
    parallelism=6,
+    pytest_num_workers=8
 )

 tokenization_job = CircleCIJob(
    "tokenization",
    docker_image=[{"image": "huggingface/transformers-torch-light"}],
    parallelism=8,
+    pytest_num_workers=16
 )

 processor_job = CircleCIJob(
    "processors",
    docker_image=[{"image": "huggingface/transformers-torch-light"}],
    parallelism=8,
+    pytest_num_workers=6
 )

 tf_job = CircleCIJob(
    "tf",
    docker_image=[{"image":"huggingface/transformers-tf-light"}],
    parallelism=6,
+    pytest_num_workers=16,
 )


@ -231,8 +222,7 @@ flax_job = CircleCIJob(
    "flax",
    docker_image=[{"image":"huggingface/transformers-jax-light"}],
    parallelism=6,
-    pytest_num_workers=16,
-    resource_class="2xlarge",
+    pytest_num_workers=16
 )


@ -241,7 +231,7 @@ pipelines_torch_job = CircleCIJob(
    additional_env={"RUN_PIPELINE_TESTS": True},
    docker_image=[{"image":"huggingface/transformers-torch-light"}],
    marker="is_pipeline_test",
-    parallelism=4,
+    parallelism=4
 )


@ -250,7 +240,7 @@ pipelines_tf_job = CircleCIJob(
    additional_env={"RUN_PIPELINE_TESTS": True},
    docker_image=[{"image":"huggingface/transformers-tf-light"}],
    marker="is_pipeline_test",
-    parallelism=4,
+    parallelism=4
 )


@ -267,6 +257,7 @@ examples_torch_job = CircleCIJob(
    docker_image=[{"image":"huggingface/transformers-examples-torch"}],
    # TODO @ArthurZucker remove this once docker is easier to build
    install_steps=["uv venv && uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
+    pytest_num_workers=8,
 )


@ -274,6 +265,7 @@ examples_tensorflow_job = CircleCIJob(
    "examples_tensorflow",
    additional_env={"OMP_NUM_THREADS": 8},
    docker_image=[{"image":"huggingface/transformers-examples-tf"}],
+    pytest_num_workers=16,
 )


@ -288,7 +280,6 @@ hub_job = CircleCIJob(
    ],
    marker="is_staging_test",
    pytest_num_workers=2,
-    resource_class="medium",
 )


@ -301,13 +292,13 @@ onnx_job = CircleCIJob(
    ],
    pytest_options={"k onnx": None},
    pytest_num_workers=1,
-    resource_class="small",
 )


 exotic_models_job = CircleCIJob(
    "exotic_models",
    docker_image=[{"image":"huggingface/transformers-exotic-models"}],
+    pytest_num_workers=12,
    parallelism=4,
    pytest_options={"durations": 100},
 )
@ -326,6 +317,7 @@ non_model_job = CircleCIJob(
    docker_image=[{"image": "huggingface/transformers-torch-light"}],
    marker="not generate",
    parallelism=6,
+    pytest_num_workers=8,
 )


@ -360,7 +352,6 @@ REPO_UTIL_TESTS = [repo_utils_job]
 DOC_TESTS = [doc_test_job]
 ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job]  # fmt: skip

-
 def create_circleci_config(folder=None):
    if folder is None:
        folder = os.getcwd()
@ -370,13 +361,7 @@ def create_circleci_config(folder=None):

    if len(jobs) == 0:
        jobs = [EmptyJob()]
-    else:
-        print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
-        # Add a job waiting all the test jobs and aggregate their test summary files at the end
-        collection_job = EmptyJob()
-        collection_job.job_name = "collection_job"
-        jobs = [collection_job] + jobs
-
+    print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
    config = {
        "version": "2.1",
        "parameters": {
@ -386,14 +371,9 @@ def create_circleci_config(folder=None):
            **{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs},
            **{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs},
        },
-        "jobs": {j.job_name: j.to_dict() for j in jobs}
+        "jobs" : {j.job_name: j.to_dict() for j in jobs},
+        "workflows": {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
    }
-    if "CIRCLE_TOKEN" in os.environ:
-        # For private forked repo. (e.g. new model addition)
-        config["workflows"] = {"version": 2, "run_tests": {"jobs": [{j.job_name: {"context": ["TRANSFORMERS_CONTEXT"]}} for j in jobs]}}
-    else:
-        # For public repo. (e.g. `transformers`)
-        config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
    with open(os.path.join(folder, "generated_config.yml"), "w") as f:
        f.write(yaml.dump(config, sort_keys=False, default_flow_style=False).replace("' << pipeline", " << pipeline").replace(">> '", " >>"))

--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@ -106,7 +106,6 @@ body:
      label: Reproduction
      description: |
        Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
-        Please include relevant config information with your code, for example your Trainers, TRL, Peft, and DeepSpeed configs.
        If you have code snippets, error messages, stack traces please provide them here as well.
        Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
        Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@ -18,8 +18,7 @@ jobs:
    name: Benchmark
    strategy:
      matrix:
-        # group: [aws-g5-4xlarge-cache, aws-p4d-24xlarge-plus] (A100 runner is not enabled)
-        group: [aws-g5-4xlarge-cache]
+        group: [aws-g5-4xlarge-cache, aws-p4d-24xlarge-plus]
    runs-on:
      group: ${{ matrix.group }}
    if: |
@ -64,7 +63,7 @@ jobs:
            commit_id=$GITHUB_SHA
          fi
          commit_msg=$(git show -s --format=%s | cut -c1-70)
-          python3 benchmark/benchmarks_entrypoint.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
+          python3 benchmark/llama.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
        env:
          HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
          # Enable this to see debug logs
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@ -3,7 +3,7 @@ name: Build docker images (scheduled)
 on:
  push:
    branches:
-      - check_doc_image
+      - build_ci_docker_image*
  repository_dispatch:
  workflow_call:
    inputs:
@ -18,6 +18,132 @@ concurrency:
  cancel-in-progress: false

 jobs:
+  latest-docker:
+    name: "Latest PyTorch + TensorFlow [dev]"
+    runs-on:
+      group: aws-general-8-plus
+    steps:
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v4
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-all-latest-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-all-latest-gpu${{ inputs.image_postfix }}
+      # Push CI images still need to be re-built daily
+      -
+        name: Build and push (for Push CI) in a daily basis
+        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+        if: inputs.image_postfix != '-push-ci'
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-all-latest-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-all-latest-gpu-push-ci
+
+      - name: Post to Slack
+        if: always()
+        uses: huggingface/hf-workflows/.github/actions/post-slack@main
+        with:
+          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
+          title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build 
+          status: ${{ job.status }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
+
+  latest-torch-deepspeed-docker:
+    name: "Latest PyTorch + DeepSpeed"
+    runs-on:
+      group: aws-general-8-plus
+    steps:
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v4
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-deepspeed-latest-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }}
+
+      - name: Post to Slack
+        if: always()
+        uses: huggingface/hf-workflows/.github/actions/post-slack@main
+        with:
+          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER}}
+          title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build 
+          status: ${{ job.status }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
+
+  # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
+  latest-torch-deepspeed-docker-for-push-ci-daily-build:
+    name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
+    runs-on:
+      group: aws-general-8-plus
+    steps:
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v4
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      # Push CI images still need to be re-built daily
+      -
+        name: Build and push (for Push CI) in a daily basis
+        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+        if: inputs.image_postfix != '-push-ci'
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-deepspeed-latest-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
+
+      - name: Post to Slack
+        if: always()
+        uses: huggingface/hf-workflows/.github/actions/post-slack@main
+        with:
+          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
+          title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build 
+          status: ${{ job.status }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
+
  doc-builder:
    name: "Doc builder"
    # Push CI doesn't need this image
@ -50,6 +176,218 @@ jobs:
        uses: huggingface/hf-workflows/.github/actions/post-slack@main
        with:
          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the huggingface/transformers-doc-builder docker build
+          title: 🤗 Results of the huggingface/transformers-doc-builder docker build 
          status: ${{ job.status }}
-          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
+
+  latest-pytorch:
+    name: "Latest PyTorch [dev]"
+    # Push CI doesn't need this image
+    if: inputs.image_postfix != '-push-ci'
+    runs-on:
+      group: aws-general-8-plus
+    steps:
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v4
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-gpu
+
+      - name: Post to Slack
+        if: always()
+        uses: huggingface/hf-workflows/.github/actions/post-slack@main
+        with:
+          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
+          title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build 
+          status: ${{ job.status }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
+
+  latest-pytorch-amd:
+    name: "Latest PyTorch (AMD) [dev]"
+    runs-on:
+      group: aws-general-8-plus
+    steps:
+      - 
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - 
+        name: Check out code
+        uses: actions/checkout@v4
+      - 
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      - 
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-amd-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-amd-gpu${{ inputs.image_postfix }}
+      # Push CI images still need to be re-built daily
+      -
+        name: Build and push (for Push CI) in a daily basis
+        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+        if: inputs.image_postfix != '-push-ci'
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-amd-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-amd-gpu-push-ci
+
+      - name: Post to Slack
+        if: always()
+        uses: huggingface/hf-workflows/.github/actions/post-slack@main
+        with:
+          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
+          title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build 
+          status: ${{ job.status }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
+
+  latest-tensorflow:
+    name: "Latest TensorFlow [dev]"
+    # Push CI doesn't need this image
+    if: inputs.image_postfix != '-push-ci'
+    runs-on:
+      group: aws-general-8-plus
+    steps:
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v4
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-tensorflow-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-tensorflow-gpu
+
+      - name: Post to Slack
+        if: always()
+        uses: huggingface/hf-workflows/.github/actions/post-slack@main
+        with:
+          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
+          title: 🤗 Results of the huggingface/transformers-tensorflow-gpu build 
+          status: ${{ job.status }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
+
+  latest-pytorch-deepspeed-amd:
+    name: "PyTorch + DeepSpeed (AMD) [dev]"
+    runs-on:
+      group: aws-general-8-plus
+    steps:
+      - 
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - 
+        name: Check out code
+        uses: actions/checkout@v4
+      - 
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      - 
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-deepspeed-amd-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-deepspeed-amd-gpu${{ inputs.image_postfix }}
+      # Push CI images still need to be re-built daily
+      -
+        name: Build and push (for Push CI) in a daily basis
+        # This condition allows `schedule` events, or `push` events that trigger this workflow NOT via `workflow_call`.
+        # The later case is useful for manual image building for debugging purpose. Use another tag in this case!
+        if: inputs.image_postfix != '-push-ci'
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-pytorch-deepspeed-amd-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-pytorch-deepspeed-amd-gpu-push-ci
+
+      - name: Post to Slack
+        if: always()
+        uses: huggingface/hf-workflows/.github/actions/post-slack@main
+        with:
+          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
+          title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build 
+          status: ${{ job.status }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
+
+  latest-quantization-torch-docker:
+    name: "Latest Pytorch + Quantization [dev]"
+     # Push CI doesn't need this image
+    if: inputs.image_postfix != '-push-ci'
+    runs-on:
+      group: aws-general-8-plus
+    steps:
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Check out code
+        uses: actions/checkout@v4
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-quantization-latest-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-quantization-latest-gpu${{ inputs.image_postfix }}
+
+      - name: Post to Slack
+        if: always()
+        uses: huggingface/hf-workflows/.github/actions/post-slack@main
+        with:
+          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
+          title: 🤗 Results of the transformers-quantization-latest-gpu build 
+          status: ${{ job.status }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
--- a/.github/workflows/push-important-models.yml
+++ b/.github/workflows/push-important-models.yml
@ -134,3 +134,10 @@ jobs:
          slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
          slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
          waitForSSH: true
+
+  benchmark:
+    name: Benchmark workflow
+    needs: get_modified_models
+    if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' && fromJson(needs.get_modified_models.outputs.matrix)[0] != null }}
+    uses: ./.github/workflows/benchmark.yml
+    secrets: inherit
--- a/.github/workflows/self-comment-ci.yml
+++ b/.github/workflows/self-comment-ci.yml
@ -1,417 +0,0 @@
-name: PR comment GitHub CI
-
-on:
-  issue_comment:
-    types:
-      - created
-    branches-ignore:
-      - main
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.issue.number }}-${{ startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow') }}
-  cancel-in-progress: true
-permissions: read-all
-
-env:
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  OMP_NUM_THREADS: 8
-  MKL_NUM_THREADS: 8
-  RUN_SLOW: yes
-  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
-  # This token is created under the bot `hf-transformers-bot`.
-  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  TF_FORCE_GPU_ALLOW_GROWTH: true
-  RUN_PT_TF_CROSS_TESTS: 1
-  CUDA_VISIBLE_DEVICES: 0,1
-
-jobs:
-  get-pr-number:
-    runs-on: ubuntu-22.04
-    name: Get PR number
-    # For security: only allow team members to run
-    if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
-    outputs:
-      PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
-    steps:
-      - name: Get PR number
-        shell: bash
-        run: |
-          if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then
-            echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
-          else
-            echo "PR_NUMBER=" >> $GITHUB_ENV
-          fi
-
-      - name: Check PR number
-        shell: bash
-        run: |
-          echo "${{ env.PR_NUMBER }}"
-
-      - name: Set PR number
-        id: set_pr_number
-        run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT"
-
-  get-sha:
-    runs-on: ubuntu-22.04
-    needs: get-pr-number
-    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
-    outputs:
-      PR_HEAD_SHA: ${{ steps.get_sha.outputs.PR_HEAD_SHA }}
-      PR_MERGE_SHA: ${{ steps.get_sha.outputs.PR_MERGE_SHA }}
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: "0"
-          ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"
-
-      - name: Get SHA (and verify timestamps against the issue comment date)
-        id: get_sha
-        env:
-          PR_NUMBER: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
-          COMMENT_DATE: ${{ github.event.comment.created_at }}
-        run: |
-            git fetch origin refs/pull/$PR_NUMBER/head:refs/remotes/pull/$PR_NUMBER/head
-            git checkout refs/remotes/pull/$PR_NUMBER/head
-            echo "PR_HEAD_SHA: $(git log -1 --format=%H)"
-            echo "PR_HEAD_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT"
-            git fetch origin refs/pull/$PR_NUMBER/merge:refs/remotes/pull/$PR_NUMBER/merge
-            git checkout refs/remotes/pull/$PR_NUMBER/merge
-            echo "PR_MERGE_SHA: $(git log -1 --format=%H)"
-            echo "PR_MERGE_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT"
-            PR_MERGE_COMMIT_TIMESTAMP=$(git log -1 --date=unix --format=%cd)
-            echo "PR_MERGE_COMMIT_TIMESTAMP: $PR_MERGE_COMMIT_TIMESTAMP"
-            COMMENT_TIMESTAMP=$(date -d "${COMMENT_DATE}" +"%s")
-            echo "COMMENT_DATE: $COMMENT_DATE"
-            echo "COMMENT_TIMESTAMP: $COMMENT_TIMESTAMP"
-            if [ $COMMENT_TIMESTAMP -le $PR_MERGE_COMMIT_TIMESTAMP ]; then
-              echo "Last commit on the pull request is newer than the issue comment triggering this run! Abort!";
-              exit -1;
-            fi
-
-  # use a python script to handle this complex logic
-  # case 1: `run-slow` (auto. infer with limited number of models, but in particular, new model)
-  # case 2: `run-slow model_1, model_2`
-  get-tests:
-    runs-on: ubuntu-22.04
-    needs: [get-pr-number, get-sha]
-    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
-    outputs:
-      models: ${{ steps.models_to_run.outputs.models }}
-      quantizations: ${{ steps.models_to_run.outputs.quantizations }}
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: "0"
-          ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge"
-
-      - name: Verify merge commit SHA
-        env:
-          VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
-        run: |
-            PR_MERGE_SHA=$(git log -1 --format=%H)
-            if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
-              echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
-              exit -1;
-            fi
-
-      - name: Get models to test
-        env:
-          PR_COMMENT: ${{ github.event.comment.body }}
-        run: |
-          python -m pip install GitPython
-          python utils/pr_slow_ci_models.py --message "$PR_COMMENT" | tee output.txt
-          echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV
-          python utils/pr_slow_ci_models.py --message "$PR_COMMENT" --quantization | tee output2.txt
-          echo "quantizations=$(tail -n 1 output2.txt)" >> $GITHUB_ENV
-
-      - name: Show models to test
-        id: models_to_run
-        run: |
-          echo "${{ env.models }}"
-          echo "models=${{ env.models }}" >> $GITHUB_ENV
-          echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
-          echo "${{ env.quantizations }}"
-          echo "quantizations=${{ env.quantizations }}" >> $GITHUB_OUTPUT
-
-  reply_to_comment:
-    name: Reply to the comment
-    if: ${{ needs.get-tests.outputs.models != '[]'  || needs.get-tests.outputs.quantizations != '[]' }}
-    needs: [get-pr-number, get-tests]
-    permissions:
-      pull-requests: write
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Reply to the comment
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          MODELS: ${{ needs.get-tests.outputs.models }}
-          BODY: "This comment contains run-slow, running the specified jobs:\n\nmodels: ${{ needs.get-tests.outputs.models }}\nquantizations: ${{ needs.get-tests.outputs.quantizations }}"
-        run: |
-          gh api \
-            --method POST \
-            -H "Accept: application/vnd.github+json" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \
-            -f "body=This comment contains run-slow, running the specified jobs: ${{ env.BODY }} ..."
-
-  create_run:
-    name: Create run
-    if: ${{ needs.get-tests.outputs.models != '[]' || needs.get-tests.outputs.quantizations != '[]' }}
-    needs: [get-sha, get-tests, reply_to_comment]
-    permissions:
-      statuses: write
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Create Run
-        id: create_run
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          # Create a commit status (pending) for a run of this workflow. The status has to be updated later in `update_run_status`.
-          # See https://docs.github.com/en/rest/commits/statuses?apiVersion=2022-11-28#create-a-commit-status
-          GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
-        run: |
-          gh api \
-            --method POST \
-            -H "Accept: application/vnd.github+json" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
-            -f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Slow CI job" -f "context=pytest/custom-tests"
-
-  run_models_gpu:
-    name: Run all tests for the model
-    if: ${{ needs.get-tests.outputs.models != '[]' }}
-    needs: [get-pr-number, get-sha, get-tests, create_run]
-    strategy:
-      fail-fast: false
-      matrix:
-        folders: ${{ fromJson(needs.get-tests.outputs.models) }}
-        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
-    runs-on:
-       group: '${{ matrix.machine_type }}'
-    container:
-      image: huggingface/transformers-all-latest-gpu
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Echo input and matrix info
-        shell: bash
-        run: |
-          echo "${{ matrix.folders }}"
-
-      - name: Echo folder ${{ matrix.folders }}
-        shell: bash
-        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
-        # set the artifact folder names (because the character `/` is not allowed).
-        run: |
-          echo "${{ matrix.folders }}"
-          matrix_folders=${{ matrix.folders }}
-          matrix_folders=${matrix_folders/'models/'/'models_'}
-          echo "$matrix_folders"
-          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
-
-      - name: Checkout to PR merge commit
-        working-directory: /transformers
-        run: |
-          git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
-          git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
-          git log -1 --format=%H
-
-      - name: Verify merge commit SHA
-        env:
-          VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
-        working-directory: /transformers
-        run: |
-          PR_MERGE_SHA=$(git log -1 --format=%H)
-          if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
-            echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
-            exit -1;
-          fi
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Set `machine_type` for report and artifact names
-        working-directory: /transformers
-        shell: bash
-        run: |
-          echo "${{ matrix.machine_type }}"
-          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
-            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ matrix.machine_type }}
-          fi
-          echo "$machine_type"
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run all tests on GPU
-        working-directory: /transformers
-        run: |
-          export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
-          echo $CUDA_VISIBLE_DEVICES
-          python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
-
-      - name: Make sure report directory exists
-        shell: bash
-        run: |
-          mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
-          echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
-          echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
-
-  run_quantization_torch_gpu:
-    name: Run all tests for a quantization
-    if: ${{ needs.get-tests.outputs.quantizations != '[]' }}
-    needs: [get-pr-number, get-sha, get-tests, create_run]
-    strategy:
-      fail-fast: false
-      matrix:
-        folders: ${{ fromJson(needs.get-tests.outputs.quantizations) }}
-        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
-    runs-on:
-      group: '${{ matrix.machine_type }}'
-    container:
-      image: huggingface/transformers-quantization-latest-gpu
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Echo folder ${{ matrix.folders }}
-        shell: bash
-        run: |
-          echo "${{ matrix.folders }}"
-          matrix_folders=${{ matrix.folders }}
-          matrix_folders=${matrix_folders/'quantization/'/'quantization_'}
-          echo "$matrix_folders"
-          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
-
-      - name: Checkout to PR merge commit
-        working-directory: /transformers
-        run: |
-          git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
-          git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
-          git log -1 --format=%H
-
-      - name: Verify merge commit SHA
-        env:
-          VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
-        working-directory: /transformers
-        run: |
-          PR_MERGE_SHA=$(git log -1 --format=%H)
-          if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
-            echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
-            exit -1;
-          fi
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Set `machine_type` for report and artifact names
-        working-directory: /transformers
-        shell: bash
-        run: |
-          echo "${{ matrix.machine_type }}"
-          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
-            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ matrix.machine_type }}
-          fi
-          echo "$machine_type"
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run quantization tests on GPU
-        working-directory: /transformers
-        run: |
-          python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
-
-      - name: Make sure report directory exists
-        shell: bash
-        run: |
-          mkdir -p /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports
-          echo "hello" > /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports/hello.txt
-          echo "${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports"
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
-
-  update_run_status:
-    name: Update Check Run Status
-    needs: [get-sha, create_run, run_models_gpu, run_quantization_torch_gpu]
-    permissions:
-      statuses: write
-    if: ${{ always() && needs.create_run.result == 'success' }}
-    runs-on: ubuntu-22.04
-    env:
-      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
-      STATUS_OK: ${{ contains(fromJSON('["skipped", "success"]'), needs.run_models_gpu.result) && contains(fromJSON('["skipped", "success"]'), needs.run_quantization_torch_gpu.result) }}
-    steps:
-      - name: Get `run_models_gpu` job status
-        run: |
-          echo "${{ needs.run_models_gpu.result }}"
-          echo "${{ needs.run_quantization_torch_gpu.result }}"
-          echo $STATUS_OK
-          if [ "$STATUS_OK" = "true" ]; then
-            echo "STATUS=success" >> $GITHUB_ENV
-          else
-            echo "STATUS=failure" >> $GITHUB_ENV
-          fi
-
-      - name: Update PR commit statuses
-        run: |
-          echo "${{ needs.run_models_gpu.result }}"
-          echo "${{ env.STATUS }}"
-          gh api \
-            --method POST \
-            -H "Accept: application/vnd.github+json" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \
-            -f "target_url=$GITHUB_RUN_URL" -f "state=${{ env.STATUS }}" -f "description=Slow CI job" -f "context=pytest/custom-tests"
--- a/.github/workflows/self-nightly-past-ci-caller.yml
+++ b/.github/workflows/self-nightly-past-ci-caller.yml
@ -21,6 +21,39 @@ jobs:
          echo "$(python3 -c 'print(int(${{ github.run_number }}) % 10)')"
          echo "run_number=$(python3 -c 'print(int(${{ github.run_number }}) % 10)')" >> $GITHUB_OUTPUT

+  run_past_ci_pytorch_1-13:
+    name: PyTorch 1.13
+    needs: get_number
+    if: needs.get_number.outputs.run_number == 0 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
+    uses: ./.github/workflows/self-past-caller.yml
+    with:
+      framework: pytorch
+      version: "1.13"
+      sha: ${{ github.sha }}
+    secrets: inherit
+
+  run_past_ci_pytorch_1-12:
+    name: PyTorch 1.12
+    needs: get_number
+    if: needs.get_number.outputs.run_number == 1 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
+    uses: ./.github/workflows/self-past-caller.yml
+    with:
+      framework: pytorch
+      version: "1.12"
+      sha: ${{ github.sha }}
+    secrets: inherit
+
+  run_past_ci_pytorch_1-11:
+    name: PyTorch 1.11
+    needs: get_number
+    if: needs.get_number.outputs.run_number == 2 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
+    uses: ./.github/workflows/self-past-caller.yml
+    with:
+      framework: pytorch
+      version: "1.11"
+      sha: ${{ github.sha }}
+    secrets: inherit
+
  run_past_ci_tensorflow_2-11:
    name: TensorFlow 2.11
    needs: get_number
--- a/.github/workflows/self-pr-slow-ci.yml
+++ b/.github/workflows/self-pr-slow-ci.yml
@ -0,0 +1,151 @@
+name: PR slow CI
+
+on:
+  pull_request:
+    paths:
+      - "src/transformers/models/*/modeling_*.py"
+      - "tests/**/test_*.py"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+env:
+  HF_HOME: /mnt/cache
+  TRANSFORMERS_IS_CI: yes
+  OMP_NUM_THREADS: 8
+  MKL_NUM_THREADS: 8
+  RUN_SLOW: yes
+  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
+  # This token is created under the bot `hf-transformers-bot`.
+  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
+  TF_FORCE_GPU_ALLOW_GROWTH: true
+  RUN_PT_TF_CROSS_TESTS: 1
+  CUDA_VISIBLE_DEVICES: 0,1
+
+jobs:
+  find_models_to_run:
+      runs-on: ubuntu-22.04
+      name: Find models to run slow tests
+      # Triggered only if the required label `run-slow` is added
+      if: ${{ contains(github.event.pull_request.labels.*.name, 'run-slow') }}
+      outputs:
+        models: ${{ steps.models_to_run.outputs.models }}
+      steps:
+        - uses: actions/checkout@v4
+          with:
+            fetch-depth: "0"
+            ref: ${{ github.event.pull_request.head.sha }}
+
+        - name: Get commit message
+          run: |
+            echo "commit_message=$(git show -s --format=%s)" >> $GITHUB_ENV
+
+        - name: Get models to run slow tests
+          run: |
+            echo "${{ env.commit_message }}"
+            python -m pip install GitPython
+            python utils/pr_slow_ci_models.py --commit_message "${{ env.commit_message }}" | tee output.txt
+            echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV
+
+        - name: Models to run slow tests
+          id: models_to_run
+          run: |
+            echo "${{ env.models }}"
+            echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
+
+  run_models_gpu:
+      name: Run all tests for the model
+      # Triggered only `find_models_to_run` is triggered (label `run-slow` is added) which gives the models to run
+      # (either a new model PR or via a commit message)
+      if: ${{ needs.find_models_to_run.outputs.models != '[]' }}
+      needs: find_models_to_run
+      strategy:
+        fail-fast: false
+        matrix:
+          folders: ${{ fromJson(needs.find_models_to_run.outputs.models) }}
+          machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+      runs-on:
+        group: '${{ matrix.machine_type }}'
+      container:
+        image: huggingface/transformers-all-latest-gpu
+        options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      steps:
+      - name: Echo input and matrix info
+        shell: bash
+        run: |
+          echo "${{ matrix.folders }}"
+
+      - name: Echo folder ${{ matrix.folders }}
+        shell: bash
+        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
+        # set the artifact folder names (because the character `/` is not allowed).
+        run: |
+          echo "${{ matrix.folders }}"
+          matrix_folders=${{ matrix.folders }}
+          matrix_folders=${matrix_folders/'models/'/'models_'}
+          echo "$matrix_folders"
+          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
+
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git fetch origin pull/${{ github.event.pull_request.number }}/head:pull/${{ github.event.pull_request.number }}/merge && git checkout pull/${{ github.event.pull_request.number }}/merge
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . && python3 -m pip install --upgrade torch torchaudio torchvision
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Set `machine_type` for report and artifact names
+        working-directory: /transformers
+        shell: bash
+        run: |
+          echo "${{ matrix.machine_type }}"
+          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
+            machine_type=single-gpu
+          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
+            machine_type=multi-gpu
+          else
+            machine_type=${{ matrix.machine_type }}
+          fi
+          echo "$machine_type"
+          echo "machine_type=$machine_type" >> $GITHUB_ENV    
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all tests on GPU
+        working-directory: /transformers
+        run: |
+          export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})"
+          echo $CUDA_VISIBLE_DEVICES
+          python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
+
+      - name: Make sure report directory exists
+        shell: bash
+        run: |
+          mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+          echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
+          echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
+
+      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
+          path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
--- a/.github/workflows/self-push-amd-mi210-caller.yml
+++ b/.github/workflows/self-push-amd-mi210-caller.yml
@ -1,25 +1,25 @@
-name: Self-hosted runner (AMD mi210 CI caller)
-
-on:
-  #workflow_run:
-  #  workflows: ["Self-hosted runner (push-caller)"]
-  #  branches: ["main"]
-  #  types: [completed]
-  push:
-    branches:
-      - run_amd_push_ci_caller*
-    paths:
-      - "src/**"
-      - "tests/**"
-      - ".github/**"
-      - "templates/**"
-      - "utils/**"
-
-jobs:
-  run_amd_ci:
-    name: AMD mi210
-    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
-    uses: ./.github/workflows/self-push-amd.yml
-    with:
-      gpu_flavor: mi210
-    secrets: inherit
+name: Self-hosted runner (AMD mi210 CI caller)
+
+on:
+  workflow_run:
+    workflows: ["Self-hosted runner (push-caller)"]
+    branches: ["main"]
+    types: [completed]
+  push:
+    branches:
+      - run_amd_push_ci_caller*
+    paths:
+      - "src/**"
+      - "tests/**"
+      - ".github/**"
+      - "templates/**"
+      - "utils/**"
+
+jobs:
+  run_amd_ci:
+    name: AMD mi210
+    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
+    uses: ./.github/workflows/self-push-amd.yml
+    with:
+      gpu_flavor: mi210
+    secrets: inherit
--- a/.github/workflows/self-push-amd-mi250-caller.yml
+++ b/.github/workflows/self-push-amd-mi250-caller.yml
@ -1,25 +1,25 @@
-name: Self-hosted runner (AMD mi250 CI caller)
-
-on:
-  #workflow_run:
-  #  workflows: ["Self-hosted runner (push-caller)"]
-  #  branches: ["main"]
-  #  types: [completed]
-  push:
-    branches:
-      - run_amd_push_ci_caller*
-    paths:
-      - "src/**"
-      - "tests/**"
-      - ".github/**"
-      - "templates/**"
-      - "utils/**"
-
-jobs:
-  run_amd_ci:
-    name: AMD mi250
-    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
-    uses: ./.github/workflows/self-push-amd.yml
-    with:
-      gpu_flavor: mi250
-    secrets: inherit
+name: Self-hosted runner (AMD mi250 CI caller)
+
+on:
+  workflow_run:
+    workflows: ["Self-hosted runner (push-caller)"]
+    branches: ["main"]
+    types: [completed]
+  push:
+    branches:
+      - run_amd_push_ci_caller*
+    paths:
+      - "src/**"
+      - "tests/**"
+      - ".github/**"
+      - "templates/**"
+      - "utils/**"
+
+jobs:
+  run_amd_ci:
+    name: AMD mi250
+    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
+    uses: ./.github/workflows/self-push-amd.yml
+    with:
+      gpu_flavor: mi250
+    secrets: inherit
--- a/.github/workflows/self-push-amd-mi300-caller.yml
+++ b/.github/workflows/self-push-amd-mi300-caller.yml
@ -1,10 +1,10 @@
 name: Self-hosted runner (AMD mi300 CI caller)

 on:
-  #workflow_run:
-  #  workflows: ["Self-hosted runner (push-caller)"]
-  #  branches: ["main"]
-  #  types: [completed]
+  workflow_run:
+    workflows: ["Self-hosted runner (push-caller)"]
+    branches: ["main"]
+    types: [completed]
  push:
    branches:
      - run_amd_push_ci_caller*
--- a/.github/workflows/self-scheduled-amd-mi210-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi210-caller.yml
@ -1,55 +1,55 @@
-name: Self-hosted runner (AMD mi210 scheduled CI caller)
-
-on:
-  workflow_run:
-    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
-    branches: ["main"]
-    types: [completed]
-  push:
-    branches:
-      - run_amd_scheduled_ci_caller*
-
-jobs:
-  model-ci:
-    name: Model CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
-    with:
-      job: run_models_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi210
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi210
-    secrets: inherit
-
-  torch-pipeline:
-    name: Torch pipeline CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
-    with:
-      job: run_pipelines_torch_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi210
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi210
-    secrets: inherit
-
-  example-ci:
-    name: Example CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
-    with:
-      job: run_examples_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi210
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi210
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi210
-      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi210
-    secrets: inherit
+name: Self-hosted runner (AMD mi210 scheduled CI caller)
+
+on:
+  workflow_run:
+    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
+    branches: ["main"]
+    types: [completed]
+  push:
+    branches:
+      - run_amd_scheduled_ci_caller*
+
+jobs:
+  model-ci:
+    name: Model CI
+    uses: ./.github/workflows/self-scheduled-amd.yml
+    with:
+      job: run_models_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi210
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi210
+    secrets: inherit
+
+  torch-pipeline:
+    name: Torch pipeline CI
+    uses: ./.github/workflows/self-scheduled-amd.yml
+    with:
+      job: run_pipelines_torch_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi210
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi210
+    secrets: inherit
+
+  example-ci:
+    name: Example CI
+    uses: ./.github/workflows/self-scheduled-amd.yml
+    with:
+      job: run_examples_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi210
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi210
+    secrets: inherit
+
+  deepspeed-ci:
+    name: DeepSpeed CI
+    uses: ./.github/workflows/self-scheduled-amd.yml
+    with:
+      job: run_torch_cuda_extensions_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi210
+      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi210
+    secrets: inherit
--- a/.github/workflows/self-scheduled-amd-mi250-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi250-caller.yml
@ -1,55 +1,55 @@
-name: Self-hosted runner (AMD mi250 scheduled CI caller)
-
-on:
-  workflow_run:
-    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
-    branches: ["main"]
-    types: [completed]
-  push:
-    branches:
-      - run_amd_scheduled_ci_caller*
-
-jobs:
-  model-ci:
-    name: Model CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
-    with:
-      job: run_models_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi250
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi250
-    secrets: inherit
-
-  torch-pipeline:
-    name: Torch pipeline CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
-    with:
-      job: run_pipelines_torch_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi250
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi250
-    secrets: inherit
-
-  example-ci:
-    name: Example CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
-    with:
-      job: run_examples_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi250
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi250
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi250
-      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi250
-    secrets: inherit
+name: Self-hosted runner (AMD mi250 scheduled CI caller)
+
+on:
+  workflow_run:
+    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
+    branches: ["main"]
+    types: [completed]
+  push:
+    branches:
+      - run_amd_scheduled_ci_caller*
+
+jobs:
+  model-ci:
+    name: Model CI
+    uses: ./.github/workflows/self-scheduled-amd.yml
+    with:
+      job: run_models_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi250
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi250
+    secrets: inherit
+
+  torch-pipeline:
+    name: Torch pipeline CI
+    uses: ./.github/workflows/self-scheduled-amd.yml
+    with:
+      job: run_pipelines_torch_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi250
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi250
+    secrets: inherit
+
+  example-ci:
+    name: Example CI
+    uses: ./.github/workflows/self-scheduled-amd.yml
+    with:
+      job: run_examples_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi250
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi250
+    secrets: inherit
+
+  deepspeed-ci:
+    name: DeepSpeed CI
+    uses: ./.github/workflows/self-scheduled-amd.yml
+    with:
+      job: run_torch_cuda_extensions_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi250
+      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi250
+    secrets: inherit
--- a/.github/workflows/self-scheduled-amd.yml
+++ b/.github/workflows/self-scheduled-amd.yml
@ -0,0 +1,349 @@
+name: Self-hosted runner (scheduled-amd)
+
+# Note: For the AMD CI, we rely on a caller workflow and on the workflow_call event to trigger the
+# CI in order to run it on both MI210 and MI250, without having to use matrix here which pushes
+# us towards the limit of allowed jobs on GitHub Actions.
+
+on:
+  workflow_call:
+    inputs:
+      job:
+        required: true
+        type: string
+      slack_report_channel:
+        required: true
+        type: string
+      runner:
+        required: true
+        type: string
+      docker:
+        required: true
+        type: string
+      ci_event:
+        required: true
+        type: string
+
+env:
+  HF_HOME: /mnt/cache
+  TRANSFORMERS_IS_CI: yes
+  OMP_NUM_THREADS: 8
+  MKL_NUM_THREADS: 8
+  RUN_SLOW: yes
+  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
+  NUM_SLICES: 2
+
+# Important note: each job (run_tests_single_gpu, run_tests_multi_gpu, run_examples_gpu, run_pipelines_torch_gpu) requires all the previous jobs before running.
+# This is done so that we avoid parallelizing the scheduled tests, to leave available
+# runners for the push CI that is running on the same machine.
+jobs:
+  check_runner_status:
+    name: Check Runner Status
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Checkout transformers
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 2
+
+      - name: Check Runner Status
+        run: python utils/check_self_hosted_runner.py --target_runners hf-amd-mi210-ci-1gpu-1,hf-amd-mi250-ci-1gpu-1,hf-amd-mi300-ci-1gpu-1 --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
+
+  check_runners:
+    name: Check Runners
+    needs: check_runner_status
+    strategy:
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
+    container:
+      image: huggingface/transformers-pytorch-amd-gpu
+      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - name: ROCM-SMI
+        run: |
+          rocm-smi
+
+      - name: ROCM-INFO
+        run: |
+          rocminfo  | grep "Agent" -A 14
+
+      - name: Show ROCR environment
+        run: |
+          echo "ROCR: $ROCR_VISIBLE_DEVICES"
+
+  setup:
+    if: contains(fromJSON('["run_models_gpu"]'), inputs.job)
+    name: Setup
+    needs: check_runners
+    strategy:
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
+    container:
+      image: huggingface/transformers-pytorch-amd-gpu
+      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    outputs:
+      folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
+      slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: |
+          git fetch && git checkout ${{ github.sha }}
+
+      - name: Cleanup
+        working-directory: /transformers
+        run: |
+          rm -rf tests/__pycache__
+          rm -rf tests/models/__pycache__
+          rm -rf reports
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - id: set-matrix
+        name: Identify models to test
+        working-directory: /transformers/tests
+        run: |
+          echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
+          echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
+
+      - name: ROCM-SMI
+        run: |
+          rocm-smi
+
+      - name: ROCM-INFO
+        run: |
+          rocminfo  | grep "Agent" -A 14
+
+      - name: Show ROCR environment
+        run: |
+          echo "ROCR: $ROCR_VISIBLE_DEVICES"
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+  run_models_gpu:
+    if: ${{ inputs.job == 'run_models_gpu' }}
+    name: Single GPU tests
+    needs: setup
+    strategy:
+      max-parallel: 1  # For now, not to parallelize. Can change later if it works well.
+      fail-fast: false
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+        slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
+    uses: ./.github/workflows/model_jobs_amd.yml
+    with:
+      folder_slices: ${{ needs.setup.outputs.folder_slices }}
+      machine_type: ${{ matrix.machine_type }}
+      slice_id: ${{ matrix.slice_id }}
+      runner: ${{ inputs.runner }}
+      docker: ${{ inputs.docker }}
+    secrets: inherit
+
+  run_pipelines_torch_gpu:
+    if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
+    name: PyTorch pipelines
+    needs: check_runners
+    strategy:
+      fail-fast: false
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
+    container:
+      image: ${{ inputs.docker }}
+      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: ROCM-SMI
+        run: |
+          rocm-smi
+
+      - name: ROCM-INFO
+        run: |
+          rocminfo  | grep "Agent" -A 14
+
+      - name: Show ROCR environment
+        run: |
+          echo "ROCR: $ROCR_VISIBLE_DEVICES"
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all pipeline tests on GPU
+        working-directory: /transformers
+        run: |
+          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test"
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
+          path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
+
+  run_examples_gpu:
+    if: ${{ inputs.job == 'run_examples_gpu' }}
+    name: Examples directory
+    needs: check_runners
+    strategy:
+      fail-fast: false
+      matrix:
+        machine_type: [single-gpu]
+    runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
+    container:
+      image: ${{ inputs.docker }}
+      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: ROCM-SMI
+        run: |
+          rocm-smi
+
+      - name: ROCM-INFO
+        run: |
+          rocminfo  | grep "Agent" -A 14
+
+      - name: Show ROCR environment
+        run: |
+          echo "ROCR: $ROCR_VISIBLE_DEVICES"
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run examples tests on GPU
+        working-directory: /transformers
+        run: |
+          pip install -r examples/pytorch/_tests_requirements.txt
+          python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch -m "not not_device_test"
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu_test_reports"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports
+          path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports
+
+  run_torch_cuda_extensions_gpu:
+    if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }}
+    name: Torch ROCm deepspeed tests
+    needs: check_runners
+    strategy:
+      fail-fast: false
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
+    container:
+      image: ${{ inputs.docker }}
+      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: ROCM-SMI
+        run: |
+          rocm-smi
+
+      - name: ROCM-INFO
+        run: |
+          rocminfo  | grep "Agent" -A 14
+
+      - name: Show ROCR environment
+        run: |
+          echo "ROCR: $ROCR_VISIBLE_DEVICES"
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all tests on GPU
+        working-directory: /transformers
+        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended -m "not not_device_test"
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+          path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
+
+  send_results:
+    name: Slack Report
+    needs: [
+      check_runner_status,
+      check_runners,
+      setup,
+      run_models_gpu,
+      run_pipelines_torch_gpu,
+      run_examples_gpu,
+      run_torch_cuda_extensions_gpu
+    ]
+    if: ${{ always() }}
+    uses: ./.github/workflows/slack-report.yml
+    with:
+      job: ${{ inputs.job }}
+      # This would be `skipped` if `setup` is skipped.
+      setup_status: ${{ needs.setup.result }}
+      slack_report_channel: ${{ inputs.slack_report_channel }}
+      # This would be an empty string if `setup` is skipped.
+      folder_slices: ${{ needs.setup.outputs.folder_slices }}
+      quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
+      ci_event: ${{ inputs.ci_event }}
+
+    secrets: inherit
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@ -366,7 +366,7 @@ jobs:
        run: |
          python3 -m pip uninstall -y deepspeed
          rm -rf DeepSpeed
-          git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build
+          git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build
          DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check

      - name: NVIDIA-SMI
--- a/.github/workflows/slack-report.yml
+++ b/.github/workflows/slack-report.yml
@ -70,7 +70,7 @@ jobs:
        with:
          name: ci_results_${{ inputs.job }}
          path: ci_results_${{ inputs.job }}
-
+      
      - uses: actions/checkout@v4
      - uses: actions/download-artifact@v4
      - name: Send message to Slack for quantization workflow
@ -90,7 +90,7 @@ jobs:
          pip install huggingface_hub
          pip install slack_sdk
          pip show slack_sdk
-          python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}"
+          python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}" 

      # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
      - name: Failure table artifacts
@ -98,4 +98,4 @@ jobs:
        uses: actions/upload-artifact@v4
        with:
          name: ci_results_${{ inputs.job }}
-          path: ci_results_${{ inputs.job }}
+          path: ci_results_${{ inputs.job }}
--- a/.github/workflows/trufflehog.yml
+++ b/.github/workflows/trufflehog.yml
@ -16,5 +16,3 @@ jobs:
          fetch-depth: 0
      - name: Secret Scanning
        uses: trufflesecurity/trufflehog@main
-        with:
-          extra_args: --results=verified,unknown
--- a/README.md
+++ b/README.md
@ -249,43 +249,23 @@ The model itself is a regular [Pytorch `nn.Module`](https://pytorch.org/docs/sta

 ### With pip

-This repository is tested on Python 3.9+, Flax 0.4.1+, PyTorch 2.0+, and TensorFlow 2.6+.
+This repository is tested on Python 3.9+, Flax 0.4.1+, PyTorch 1.11+, and TensorFlow 2.6+.

 You should install 🤗 Transformers in a [virtual environment](https://docs.python.org/3/library/venv.html). If you're unfamiliar with Python virtual environments, check out the [user guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).

 First, create a virtual environment with the version of Python you're going to use and activate it.

-**macOS/Linux**
-
-```python -m venv env
-source env/bin/activate
-```
-
-**Windows**
-
-``` python -m venv env
-env\Scripts\activate
-```
-
-To use 🤗 Transformers, you must install at least one of Flax, PyTorch, or TensorFlow. Refer to the official installation guides for platform-specific commands:
-
-[TensorFlow installation page](https://www.tensorflow.org/install/), 
-[PyTorch installation page](https://pytorch.org/get-started/locally/#start-locally) and/or [Flax](https://github.com/google/flax#quick-install) and [Jax](https://github.com/google/jax#installation) 
+Then, you will need to install at least one of Flax, PyTorch, or TensorFlow.
+Please refer to [TensorFlow installation page](https://www.tensorflow.org/install/), [PyTorch installation page](https://pytorch.org/get-started/locally/#start-locally) and/or [Flax](https://github.com/google/flax#quick-install) and [Jax](https://github.com/google/jax#installation) installation pages regarding the specific installation command for your platform.

 When one of those backends has been installed, 🤗 Transformers can be installed using pip as follows:

-```
+```bash
 pip install transformers
 ```

 If you'd like to play with the examples or need the bleeding edge of the code and can't wait for a new release, you must [install the library from source](https://huggingface.co/docs/transformers/installation#installing-from-source).

-```
-git clone https://github.com/huggingface/transformers.git
-cd transformers
-pip install .
-```
-
 ### With conda

 🤗 Transformers can be installed using conda as follows:
--- a/awesome-transformers.md
+++ b/awesome-transformers.md
@ -15,7 +15,7 @@ to add it.

 Keywords: Open-source, LLaMa, GPT-J, instruction, assistant

-## [recommenders](https://github.com/recommenders-team/recommenders)
+## [recommenders](https://github.com/microsoft/recommenders)

 This repository contains examples and best practices for building recommendation systems, provided as Jupyter notebooks. It goes over several aspects required to build efficient recommendation systems: data preparation, modeling, evaluation, model selection & optimization, as well as operationalization

@ -39,15 +39,15 @@ MindsDB is a low-code ML platform, which automates and integrates several ML fra

 Keywords: Database, low-code, AI table

-## [langchain](https://github.com/langchain-ai/langchain)
+## [langchain](https://github.com/hwchase17/langchain)

-[langchain](https://github.com/langchain-ai/langchain) is aimed at assisting in the development of apps merging both LLMs and other sources of knowledge. The library allows chaining calls to applications, creating a sequence across many tools.
+[langchain](https://github.com/hwchase17/langchain) is aimed at assisting in the development of apps merging both LLMs and other sources of knowledge. The library allows chaining calls to applications, creating a sequence across many tools.

 Keywords: LLMs, Large Language Models, Agents, Chains

-## [LlamaIndex](https://github.com/run-llama/llama_index)
+## [LlamaIndex](https://github.com/jerryjliu/llama_index)

-[LlamaIndex](https://github.com/run-llama/llama_index) is a project that provides a central interface to connect your LLM's with external data. It provides various kinds of indices and retreival mechanisms to perform different LLM tasks and obtain knowledge-augmented results.
+[LlamaIndex](https://github.com/jerryjliu/llama_index) is a project that provides a central interface to connect your LLM's with external data. It provides various kinds of indices and retreival mechanisms to perform different LLM tasks and obtain knowledge-augmented results.

 Keywords: LLMs, Large Language Models, Data Retrieval, Indices, Knowledge Augmentation 

@ -146,9 +146,9 @@ Keywords: Framework, simplicity, NLP

 Keywords: LLM, Agents, HF Hub

-## [transformers.js](https://github.com/huggingface/transformers.js/)
+## [transformers.js](https://xenova.github.io/transformers.js/)

-[transformers.js](https://github.com/huggingface/transformers.js/) is a JavaScript library targeted at running models from transformers directly within the browser.
+[transformers.js](https://xenova.github.io/transformers.js/) is a JavaScript library targeted at running models from transformers directly within the browser.

 Keywords: Transformers, JavaScript, browser

@ -437,7 +437,7 @@ Keywords: DALL-E, Russian

 Keywords: Knowledge Extraction, Knowledge Graphs

-## [Nebuly](https://github.com/nebuly-ai/optimate)
+## [Nebuly](https://github.com/nebuly-ai/nebuly)

 Nebuly is the next-generation platform to monitor and optimize your AI costs in one place. The platform connects to all your AI cost sources (compute, API providers, AI software licenses, etc) and centralizes them in one place to give you full visibility on a model basis. The platform also provides optimization recommendations and a co-pilot model that can guide during the optimization process. The platform builds on top of the open-source tools allowing you to optimize the different steps of your AI stack to squeeze out the best possible cost performances.

--- a/benchmark/README.md
+++ b/benchmark/README.md
@ -1,49 +0,0 @@
-# Benchmarks
-
-You might want to add new benchmarks.
-
-You will need to define a python function named `run_benchmark` in your python file and the file must be located in this `benchmark/` directory.
-
-The expected function signature is the following:
-
-```py
-def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
-```
-
-## Writing metrics to the database
-
-`MetricRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
-
-cf [`llama.py`](./llama.py) to see an example of this in practice.
-
-```py
-from benchmarks_entrypoint import MetricsRecorder
-import psycopg2
-
-def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
-  metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
-  benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
-    # To collect device measurements
-    metrics_recorder.collect_device_measurements(
-        benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
-    )
-    # To collect your model measurements
-    metrics_recorder.collect_model_measurements(
-        benchmark_id,
-        {
-            "model_load_time": model_load_time,
-            "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
-            "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
-            "first_eager_generate_time_secs": first_eager_generate_time,
-            "second_eager_generate_time_secs": second_eager_generate_time,
-            "time_to_first_token_secs": time_to_first_token,
-            "time_to_second_token_secs": time_to_second_token,
-            "time_to_third_token_secs": time_to_third_token,
-            "time_to_next_token_mean_secs": mean_time_to_next_token,
-            "first_compile_generate_time_secs": first_compile_generate_time,
-            "second_compile_generate_time_secs": second_compile_generate_time,
-            "third_compile_generate_time_secs": third_compile_generate_time,
-            "fourth_compile_generate_time_secs": fourth_compile_generate_time,
-        },
-    )
-```
--- a/benchmark/benchmarks_entrypoint.py
+++ b/benchmark/benchmarks_entrypoint.py
@ -1,144 +0,0 @@
-import argparse
-import importlib.util
-import logging
-import os
-from typing import Dict
-import psycopg2
-import sys
-
-from psycopg2.extras import Json
-from psycopg2.extensions import register_adapter
-
-
-register_adapter(dict, Json)
-
-
-class ImportModuleException(Exception):
-    pass
-
-
-class MetricsRecorder:
-    def __init__(self, connection, logger: logging.Logger, branch: str, commit_id: str, commit_msg: str):
-        self.conn = connection
-        self.conn.autocommit = True
-        self.logger = logger
-        self.branch = branch
-        self.commit_id = commit_id
-        self.commit_msg = commit_msg
-
-    def initialise_benchmark(self, metadata: Dict[str, str]) -> int:
-        """
-        Creates a new benchmark, returns the benchmark id
-        """
-        # gpu_name: str, model_id: str
-        with self.conn.cursor() as cur:
-            cur.execute(
-                "INSERT INTO benchmarks (branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s) RETURNING benchmark_id",
-                (self.branch, self.commit_id, self.commit_msg, metadata),
-            )
-            benchmark_id = cur.fetchone()[0]
-            logger.debug(f"initialised benchmark #{benchmark_id}")
-            return benchmark_id
-
-    def collect_device_measurements(self, benchmark_id: int, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
-        """
-        Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function.
-        """
-        with self.conn.cursor() as cur:
-            cur.execute(
-                "INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
-                (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
-            )
-        self.logger.debug(
-            f"inserted device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
-        )
-
-    def collect_model_measurements(self, benchmark_id: int, measurements: Dict[str, float]):
-        with self.conn.cursor() as cur:
-            cur.execute(
-                """
-                INSERT INTO model_measurements (
-                    benchmark_id,
-                    measurements
-                ) VALUES (%s, %s)
-                """,
-                (
-                    benchmark_id,
-                    measurements,
-                ),
-            )
-        self.logger.debug(f"inserted model measurements for benchmark #{benchmark_id}: {measurements}")
-
-    def close(self):
-        self.conn.close()
-
-
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-
-handler = logging.StreamHandler(sys.stdout)
-handler.setLevel(logging.INFO)
-formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
-handler.setFormatter(formatter)
-logger.addHandler(handler)
-
-
-def parse_arguments():
-    """
-    Parse command line arguments for the benchmarking CLI.
-    """
-    parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
-
-    parser.add_argument(
-        "branch",
-        type=str,
-        help="The branch name on which the benchmarking is performed.",
-    )
-
-    parser.add_argument(
-        "commit_id",
-        type=str,
-        help="The commit hash on which the benchmarking is performed.",
-    )
-
-    parser.add_argument(
-        "commit_msg",
-        type=str,
-        help="The commit message associated with the commit, truncated to 70 characters.",
-    )
-
-    args = parser.parse_args()
-
-    return args.branch, args.commit_id, args.commit_msg
-
-
-def import_from_path(module_name, file_path):
-    try:
-        spec = importlib.util.spec_from_file_location(module_name, file_path)
-        module = importlib.util.module_from_spec(spec)
-        sys.modules[module_name] = module
-        spec.loader.exec_module(module)
-        return module
-    except Exception as e:
-        raise ImportModuleException(f"failed to load python module: {e}")
-
-
-if __name__ == "__main__":
-    benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__))
-
-    branch, commit_id, commit_msg = parse_arguments()
-
-    for entry in os.scandir(benchmarks_folder_path):
-        try:
-            if not entry.name.endswith(".py"):
-                continue
-            if entry.path == __file__:
-                continue
-            logger.debug(f"loading: {entry.name}")
-            module = import_from_path(entry.name.split(".")[0], entry.path)
-            logger.info(f"runnning benchmarks in: {entry.name}")
-            module.run_benchmark(logger, branch, commit_id, commit_msg)
-        except ImportModuleException as e:
-            logger.error(e)
-        except Exception as e:
-            logger.error(f"error running benchmarks for {entry.name}: {e}")
--- a/benchmark/default.yml
+++ b/benchmark/default.yml
@ -1,10 +0,0 @@
-apiVersion: 1
-
-providers:
-  - name: 'Transformers Benchmarks'
-    orgId: 1
-    type: file
-    updateIntervalSeconds: 10
-    allowUiUpdates: true
-    options:
-      path: /etc/grafana/dashboards
--- a/benchmark/grafana_dashboard.json
+++ b/benchmark/grafana_dashboard.json
@ -30,7 +30,7 @@
      "title": "Go to data",
      "tooltip": "Go to data",
      "type": "link",
-      "url": "http://transformers-benchmarks.hf.co/d/fdz33iyzln9c0a/transformers-benchmarks?orgId=1&from=${StartTime}&to=${EndTime}"
+      "url": "http://transformers-benchmarks.huggingface.co/d/fdz33iyzln9c0a/transformers-benchmarks?orgId=1&from=${StartTime}&to=${EndTime}"
    }
  ],
  "liveNow": true,
@ -77,7 +77,7 @@
            "properties": [
              {
                "id": "custom.width",
-                "value": 202
+                "value": 196
              }
            ]
          },
@ -101,7 +101,7 @@
            "properties": [
              {
                "id": "custom.width",
-                "value": 524
+                "value": 581
              }
            ]
          },
@ -113,19 +113,7 @@
            "properties": [
              {
                "id": "custom.width",
-                "value": 353
-              }
-            ]
-          },
-          {
-            "matcher": {
-              "id": "byName",
-              "options": "model_id"
-            },
-            "properties": [
-              {
-                "id": "custom.width",
-                "value": 216
+                "value": 379
              }
            ]
          }
@ -155,14 +143,12 @@
      "targets": [
        {
          "datasource": {
-            "default": true,
-            "type": "grafana-postgresql-datasource",
-            "uid": "be28nkzirtb0gd"
+            "type": "grafana-postgresql-datasource"
          },
          "editorMode": "code",
          "format": "table",
          "rawQuery": true,
-          "rawSql": "SELECT commit_id, commit_message, metadata->>'gpu_name' as gpu_name, metadata->>'model_id' as model_id, created_at AS date FROM benchmarks WHERE branch = '${branch}' AND metadata->>'gpu_name' = '${gpu_name}' ORDER BY benchmark_id DESC LIMIT ${last_n_commits};",
+          "rawSql": "SELECT commit_id as commit_id, commit_message, gpu_name, created_at AS date FROM benchmarks WHERE branch = '${branch}' ORDER BY benchmark_id DESC LIMIT ${last_n_commits};",
          "refId": "A",
          "sql": {
            "columns": [
@ -320,14 +306,13 @@
      "targets": [
        {
          "datasource": {
-            "default": true,
            "type": "grafana-postgresql-datasource",
-            "uid": "be28nkzirtb0gd"
+            "uid": "bdz2yss7sxo1sc"
          },
          "editorMode": "code",
          "format": "table",
          "rawQuery": true,
-          "rawSql": "SELECT CAST(m.measurements->'first_eager_forward_pass_time_secs' AS double precision) AS first_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "rawSql": "SELECT CAST(m.measurements->'first_eager_forward_pass_time_secs' AS double precision) AS first_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
          "refId": "A",
          "sql": {
            "columns": [
@ -446,14 +431,13 @@
      "targets": [
        {
          "datasource": {
-            "default": true,
            "type": "grafana-postgresql-datasource",
-            "uid": "be28nkzirtb0gd"
+            "uid": "bdz2yss7sxo1sc"
          },
          "editorMode": "code",
          "format": "table",
          "rawQuery": true,
-          "rawSql": "SELECT CAST(m.measurements->'second_eager_forward_pass_time_secs' AS double precision) AS second_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "rawSql": "SELECT CAST(m.measurements->'second_eager_forward_pass_time_secs' AS double precision) AS second_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
          "refId": "A",
          "sql": {
            "columns": [
@ -581,14 +565,13 @@
      "targets": [
        {
          "datasource": {
-            "default": true,
            "type": "grafana-postgresql-datasource",
-            "uid": "be28nkzirtb0gd"
+            "uid": "bdz2yss7sxo1sc"
          },
          "editorMode": "code",
          "format": "table",
          "rawQuery": true,
-          "rawSql": "SELECT CAST(m.measurements->'time_to_first_token_secs' AS double precision) AS time_to_first_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "rawSql": "SELECT CAST(m.measurements->'time_to_first_token_secs' AS double precision) AS time_to_first_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
          "refId": "A",
          "sql": {
            "columns": [
@ -703,14 +686,13 @@
      "targets": [
        {
          "datasource": {
-            "default": true,
            "type": "grafana-postgresql-datasource",
-            "uid": "be28nkzirtb0gd"
+            "uid": "bdz2yss7sxo1sc"
          },
          "editorMode": "code",
          "format": "table",
          "rawQuery": true,
-          "rawSql": "SELECT CAST(m.measurements->'time_to_second_token_secs' AS double precision) AS time_to_second_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "rawSql": "SELECT CAST(m.measurements->'time_to_second_token_secs' AS double precision) AS time_to_second_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
          "refId": "A",
          "sql": {
            "columns": [
@ -825,14 +807,13 @@
      "targets": [
        {
          "datasource": {
-            "default": true,
            "type": "grafana-postgresql-datasource",
-            "uid": "be28nkzirtb0gd"
+            "uid": "bdz2yss7sxo1sc"
          },
          "editorMode": "code",
          "format": "table",
          "rawQuery": true,
-          "rawSql": "SELECT CAST(m.measurements->'time_to_third_token_secs' AS double precision) AS time_to_third_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "rawSql": "SELECT CAST(m.measurements->'time_to_third_token_secs' AS double precision) AS time_to_third_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
          "refId": "A",
          "sql": {
            "columns": [
@ -947,14 +928,13 @@
      "targets": [
        {
          "datasource": {
-            "default": true,
            "type": "grafana-postgresql-datasource",
-            "uid": "be28nkzirtb0gd"
+            "uid": "bdz2yss7sxo1sc"
          },
          "editorMode": "code",
          "format": "table",
          "rawQuery": true,
-          "rawSql": "SELECT CAST(m.measurements->'time_to_next_token_mean_secs' AS double precision) AS time_to_next_token_mean_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "rawSql": "SELECT CAST(m.measurements->'time_to_next_token_mean_secs' AS double precision) AS time_to_next_token_mean_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
          "refId": "A",
          "sql": {
            "columns": [
@ -1082,14 +1062,13 @@
      "targets": [
        {
          "datasource": {
-            "default": true,
            "type": "grafana-postgresql-datasource",
-            "uid": "be28nkzirtb0gd"
+            "uid": "bdz2yss7sxo1sc"
          },
          "editorMode": "code",
          "format": "table",
          "rawQuery": true,
-          "rawSql": "SELECT CAST(m.measurements->'first_compile_generate_time_secs' AS double precision) AS first_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "rawSql": "SELECT CAST(m.measurements->'first_compile_generate_time_secs' AS double precision) AS first_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
          "refId": "A",
          "sql": {
            "columns": [
@ -1204,14 +1183,13 @@
      "targets": [
        {
          "datasource": {
-            "default": true,
            "type": "grafana-postgresql-datasource",
-            "uid": "be28nkzirtb0gd"
+            "uid": "bdz2yss7sxo1sc"
          },
          "editorMode": "code",
          "format": "table",
          "rawQuery": true,
-          "rawSql": "SELECT CAST(m.measurements->'second_compile_generate_time_secs' AS double precision) AS second_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "rawSql": "SELECT CAST(m.measurements->'second_compile_generate_time_secs' AS double precision) AS second_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
          "refId": "A",
          "sql": {
            "columns": [
@ -1326,14 +1304,13 @@
      "targets": [
        {
          "datasource": {
-            "default": true,
            "type": "grafana-postgresql-datasource",
-            "uid": "be28nkzirtb0gd"
+            "uid": "bdz2yss7sxo1sc"
          },
          "editorMode": "code",
          "format": "table",
          "rawQuery": true,
-          "rawSql": "SELECT CAST(m.measurements->'third_compile_generate_time_secs' AS double precision) AS third_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "rawSql": "SELECT CAST(m.measurements->'third_compile_generate_time_secs' AS double precision) AS third_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
          "refId": "A",
          "sql": {
            "columns": [
@ -1448,14 +1425,13 @@
      "targets": [
        {
          "datasource": {
-            "default": true,
            "type": "grafana-postgresql-datasource",
-            "uid": "be28nkzirtb0gd"
+            "uid": "bdz2yss7sxo1sc"
          },
          "editorMode": "code",
          "format": "table",
          "rawQuery": true,
-          "rawSql": "SELECT CAST(m.measurements->'fourth_compile_generate_time_secs' AS double precision) AS fourth_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "rawSql": "SELECT CAST(m.measurements->'fourth_compile_generate_time_secs' AS double precision) AS fourth_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND gpu_name = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
          "refId": "A",
          "sql": {
            "columns": [
@ -1504,7 +1480,11 @@
      "id": 15,
      "panels": [
        {
-          "datasource": {},
+          "datasource": {
+            "default": true,
+            "type": "grafana-postgresql-datasource",
+            "uid": "be28nkzirtb0gd"
+          },
          "fieldConfig": {
            "defaults": {
              "color": {
@ -1548,7 +1528,8 @@
                "mode": "absolute",
                "steps": [
                  {
-                    "color": "green"
+                    "color": "green",
+                    "value": null
                  },
                  {
                    "color": "red",
@ -1582,9 +1563,8 @@
          "targets": [
            {
              "datasource": {
-                "default": true,
                "type": "grafana-postgresql-datasource",
-                "uid": "be28nkzirtb0gd"
+                "uid": "bdz2yss7sxo1sc"
              },
              "editorMode": "code",
              "format": "table",
@ -1685,7 +1665,11 @@
          "type": "timeseries"
        },
        {
-          "datasource": {},
+          "datasource": {
+            "default": true,
+            "type": "grafana-postgresql-datasource",
+            "uid": "be28nkzirtb0gd"
+          },
          "fieldConfig": {
            "defaults": {
              "color": {
@ -1729,7 +1713,8 @@
                "mode": "absolute",
                "steps": [
                  {
-                    "color": "green"
+                    "color": "green",
+                    "value": null
                  },
                  {
                    "color": "red",
@ -1763,9 +1748,8 @@
          "targets": [
            {
              "datasource": {
-                "default": true,
                "type": "grafana-postgresql-datasource",
-                "uid": "be28nkzirtb0gd"
+                "uid": "bdz2yss7sxo1sc"
              },
              "editorMode": "code",
              "format": "table",
@ -1866,7 +1850,11 @@
          "type": "timeseries"
        },
        {
-          "datasource": {},
+          "datasource": {
+            "default": true,
+            "type": "grafana-postgresql-datasource",
+            "uid": "be28nkzirtb0gd"
+          },
          "fieldConfig": {
            "defaults": {
              "color": {
@ -1910,7 +1898,8 @@
                "mode": "absolute",
                "steps": [
                  {
-                    "color": "green"
+                    "color": "green",
+                    "value": null
                  },
                  {
                    "color": "red",
@ -1944,9 +1933,8 @@
          "targets": [
            {
              "datasource": {
-                "default": true,
                "type": "grafana-postgresql-datasource",
-                "uid": "be28nkzirtb0gd"
+                "uid": "bdz2yss7sxo1sc"
              },
              "editorMode": "code",
              "format": "table",
@ -2047,7 +2035,11 @@
          "type": "timeseries"
        },
        {
-          "datasource": {},
+          "datasource": {
+            "default": true,
+            "type": "grafana-postgresql-datasource",
+            "uid": "be28nkzirtb0gd"
+          },
          "fieldConfig": {
            "defaults": {
              "color": {
@ -2091,7 +2083,8 @@
                "mode": "absolute",
                "steps": [
                  {
-                    "color": "green"
+                    "color": "green",
+                    "value": null
                  },
                  {
                    "color": "red",
@ -2125,9 +2118,8 @@
          "targets": [
            {
              "datasource": {
-                "default": true,
                "type": "grafana-postgresql-datasource",
-                "uid": "be28nkzirtb0gd"
+                "uid": "bdz2yss7sxo1sc"
              },
              "editorMode": "code",
              "format": "table",
@ -2232,6 +2224,7 @@
      "type": "row"
    }
  ],
+  "refresh": "",
  "schemaVersion": 39,
  "tags": [],
  "templating": {
@ -2243,7 +2236,6 @@
          "value": "main"
        },
        "datasource": {
-          "default": true,
          "type": "grafana-postgresql-datasource",
          "uid": "be28nkzirtb0gd"
        },
@ -2256,7 +2248,7 @@
        "name": "branch",
        "options": [],
        "query": "SELECT DISTINCT branch FROM benchmarks;",
-        "refresh": 1,
+        "refresh": 2,
        "regex": "",
        "skipUrlSync": false,
        "sort": 0,
@ -2269,7 +2261,6 @@
          "value": "1729701492845"
        },
        "datasource": {
-          "default": true,
          "type": "grafana-postgresql-datasource",
          "uid": "be28nkzirtb0gd"
        },
@ -2290,11 +2281,10 @@
      {
        "current": {
          "selected": false,
-          "text": "1730393397577",
-          "value": "1730393397577"
+          "text": "1730120430069",
+          "value": "1730120430069"
        },
        "datasource": {
-          "default": true,
          "type": "grafana-postgresql-datasource",
          "uid": "be28nkzirtb0gd"
        },
@ -2322,16 +2312,15 @@
          "type": "grafana-postgresql-datasource",
          "uid": "be28nkzirtb0gd"
        },
-        "definition": "SELECT DISTINCT metadata->>'gpu_name' FROM benchmarks;",
-        "description": "",
+        "definition": "SELECT DISTINCT gpu_name FROM benchmarks;",
        "hide": 0,
        "includeAll": false,
        "label": "GPU",
        "multi": false,
        "name": "gpu_name",
        "options": [],
-        "query": "SELECT DISTINCT metadata->>'gpu_name' FROM benchmarks;",
-        "refresh": 1,
+        "query": "SELECT DISTINCT gpu_name FROM benchmarks;",
+        "refresh": 2,
        "regex": "",
        "skipUrlSync": false,
        "sort": 0,
@ -2339,7 +2328,7 @@
      },
      {
        "current": {
-          "selected": true,
+          "selected": false,
          "text": "10",
          "value": "10"
        },
@ -2370,6 +2359,6 @@
  "timezone": "browser",
  "title": "Transformers benchmarks",
  "uid": "fdz33iyzln9c0a",
-  "version": 10,
+  "version": 4,
  "weekStart": ""
 }
--- a/benchmark/grafana_datasource.yaml
+++ b/benchmark/grafana_datasource.yaml
@ -1,17 +0,0 @@
-apiVersion: 1
-datasources:
-  - name: grafana-postgresql-datasource
-    uid: be28nkzirtb0gd
-    type: postgres
-    url: $GRAFANA_POSTGRES_DATASOURCE_URL
-    user: $GRAFANA_POSTGRES_DATASOURCE_USER
-    secureJsonData:
-      password: $GRAFANA_POSTGRES_DATASOURCE_PWD
-    jsonData:
-      database: metrics
-      maxOpenConns: 100
-      maxIdleConns: 100
-      maxIdleConnsAuto: true
-      connMaxLifetime: 14400
-      postgresVersion: 1000
-      timescaledb: false
--- a/benchmark/init_db.sql
+++ b/benchmark/init_db.sql
@ -3,7 +3,7 @@ CREATE TABLE IF NOT EXISTS benchmarks (
  branch VARCHAR(255),
  commit_id VARCHAR(72),
  commit_message VARCHAR(70),
-  metadata jsonb,
+  gpu_name VARCHAR(255),
  created_at timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
 );

--- a/benchmark/llama.py
+++ b/benchmark/llama.py
@ -1,25 +1,71 @@
-from logging import Logger
+import argparse
+import json
+import logging
 import os
+import sys
+from statistics import mean
 from threading import Event, Thread
 from time import perf_counter, sleep
 from typing import Optional
-from benchmarks_entrypoint import MetricsRecorder
 import gpustat
 import psutil
 import psycopg2
 import torch

 from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
+from psycopg2.extras import Json
+from psycopg2.extensions import register_adapter


 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+handler = logging.StreamHandler(sys.stdout)
+handler.setLevel(logging.INFO)
+formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+
 os.environ["TOKENIZERS_PARALLELISM"] = "1"
 torch.set_float32_matmul_precision("high")
+register_adapter(dict, Json)


-def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
+def parse_arguments():
+    """
+    Parse command line arguments for the benchmarking CLI.
+    """
+    parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
+
+    parser.add_argument(
+        "branch",
+        type=str,
+        help="The branch name on which the benchmarking is performed.",
+    )
+
+    parser.add_argument(
+        "commit_id",
+        type=str,
+        help="The commit hash on which the benchmarking is performed.",
+    )
+
+    parser.add_argument(
+        "commit_msg",
+        type=str,
+        help="The commit message associated with the commit, truncated to 70 characters.",
+    )
+
+    args = parser.parse_args()
+
+    return args.branch, args.commit_id, args.commit_msg
+
+
+def collect_metrics(benchmark_id, continue_metric_collection):
    p = psutil.Process(os.getpid())
+    conn = psycopg2.connect("dbname=metrics")
+    cur = conn.cursor()
    while not continue_metric_collection.is_set():
        with p.oneshot():
            cpu_util = p.cpu_percent()
@ -27,41 +73,47 @@ def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
        gpu_stats = gpustat.GPUStatCollection.new_query()
        gpu_util = gpu_stats[0]["utilization.gpu"]
        gpu_mem_megabytes = gpu_stats[0]["memory.used"]
-        metrics_recorder.collect_device_measurements(
-            benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
+        cur.execute(
+            "INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
+            (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
        )
        sleep(0.01)
+        conn.commit()
+    conn.close()


-def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
+def run_benchmark(branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
    continue_metric_collection = Event()
    metrics_thread = None
-    model_id = "meta-llama/Llama-2-7b-hf"
-    metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
    try:
        gpu_stats = gpustat.GPUStatCollection.new_query()
        gpu_name = gpu_stats[0]["name"]
-        benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
-        logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}")
-        metrics_thread = Thread(
-            target=collect_metrics,
-            args=[benchmark_id, continue_metric_collection, metrics_recorder],
+        conn = psycopg2.connect("dbname=metrics")
+        cur = conn.cursor()
+        cur.execute(
+            "INSERT INTO benchmarks (branch, commit_id, commit_message, gpu_name) VALUES (%s, %s, %s, %s) RETURNING benchmark_id",
+            (branch, commit_id, commit_msg, gpu_name),
        )
+        conn.commit()
+        benchmark_id = cur.fetchone()[0]
+        logger.info(f"running benchmark #{benchmark_id} on {gpu_name}")
+        metrics_thread = Thread(target=collect_metrics, args=[benchmark_id, continue_metric_collection])
        metrics_thread.start()
        logger.info("started background thread to fetch device metrics")

        os.environ["TOKENIZERS_PARALLELISM"] = "false"  # silence warnings when compiling

        device = "cuda"
+        ckpt = "meta-llama/Llama-2-7b-hf"

        logger.info("downloading weights")
        # This is to avoid counting download in model load time measurement
-        model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16)
+        model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16)
        gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
        logger.info("loading model")
        start = perf_counter()
        model = AutoModelForCausalLM.from_pretrained(
-            model_id, torch_dtype=torch.float16, generation_config=gen_config
+            ckpt, torch_dtype=torch.float16, generation_config=gen_config
        ).eval()
        model.to(device)
        torch.cuda.synchronize()
@ -69,7 +121,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
        model_load_time = end - start
        logger.info(f"loaded model in: {model_load_time}s")

-        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        tokenizer = AutoTokenizer.from_pretrained(ckpt)

        prompt = "Why dogs are so cute?"
        inputs = tokenizer(prompt, return_tensors="pt").to(device)
@ -316,27 +368,41 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
            logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s")
            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")

-        metrics_recorder.collect_model_measurements(
-            benchmark_id,
-            {
-                "model_load_time": model_load_time,
-                "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
-                "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
-                "first_eager_generate_time_secs": first_eager_generate_time,
-                "second_eager_generate_time_secs": second_eager_generate_time,
-                "time_to_first_token_secs": time_to_first_token,
-                "time_to_second_token_secs": time_to_second_token,
-                "time_to_third_token_secs": time_to_third_token,
-                "time_to_next_token_mean_secs": mean_time_to_next_token,
-                "first_compile_generate_time_secs": first_compile_generate_time,
-                "second_compile_generate_time_secs": second_compile_generate_time,
-                "third_compile_generate_time_secs": third_compile_generate_time,
-                "fourth_compile_generate_time_secs": fourth_compile_generate_time,
-            },
+        cur.execute(
+            """
+            INSERT INTO model_measurements (
+                benchmark_id,
+                measurements
+            ) VALUES (%s, %s)
+            """,
+            (
+                benchmark_id,
+                {
+                    "model_load_time": model_load_time,
+                    "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
+                    "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
+                    "first_eager_generate_time_secs": first_eager_generate_time,
+                    "second_eager_generate_time_secs": second_eager_generate_time,
+                    "time_to_first_token_secs": time_to_first_token,
+                    "time_to_second_token_secs": time_to_second_token,
+                    "time_to_third_token_secs": time_to_third_token,
+                    "time_to_next_token_mean_secs": mean_time_to_next_token,
+                    "first_compile_generate_time_secs": first_compile_generate_time,
+                    "second_compile_generate_time_secs": second_compile_generate_time,
+                    "third_compile_generate_time_secs": third_compile_generate_time,
+                    "fourth_compile_generate_time_secs": fourth_compile_generate_time,
+                },
+            ),
        )
+        conn.commit()
+        conn.close()
    except Exception as e:
        logger.error(f"Caught exception: {e}")
    continue_metric_collection.set()
    if metrics_thread is not None:
        metrics_thread.join()
-    metrics_recorder.close()
+
+
+if __name__ == "__main__":
+    branch, commit_id, commit_msg = parse_arguments()
+    run_benchmark(branch, commit_id, commit_msg, num_tokens_to_generate=20)
--- a/docker/consistency.dockerfile
+++ b/docker/consistency.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 USER root
 ARG REF=main
--- a/docker/custom-tokenizers.dockerfile
+++ b/docker/custom-tokenizers.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 USER root
 RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake wget xz-utils build-essential g++5 libprotobuf-dev protobuf-compiler
--- a/docker/examples-tf.dockerfile
+++ b/docker/examples-tf.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 USER root
 RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git
--- a/docker/examples-torch.dockerfile
+++ b/docker/examples-torch.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 USER root
 RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git
--- a/docker/exotic-models.dockerfile
+++ b/docker/exotic-models.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
--- a/docker/jax-light.dockerfile
+++ b/docker/jax-light.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
--- a/docker/pipeline-tf.dockerfile
+++ b/docker/pipeline-tf.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
--- a/docker/pipeline-torch.dockerfile
+++ b/docker/pipeline-torch.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
--- a/docker/quality.dockerfile
+++ b/docker/quality.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
--- a/docker/tf-light.dockerfile
+++ b/docker/tf-light.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
--- a/docker/torch-jax-light.dockerfile
+++ b/docker/torch-jax-light.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
--- a/docker/torch-light.dockerfile
+++ b/docker/torch-light.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
--- a/docker/torch-tf-light.dockerfile
+++ b/docker/torch-tf-light.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 RUN echo ${REF}
--- a/docker/transformers-all-latest-gpu/Dockerfile
+++ b/docker/transformers-all-latest-gpu/Dockerfile
@ -9,7 +9,7 @@ SHELL ["sh", "-lc"]
 # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
 # to be used as arguments for docker build (so far).

-ARG PYTORCH='2.6.0'
+ARG PYTORCH='2.5.1'
 # (not always a valid torch version)
 ARG INTEL_TORCH_EXT='2.3.0'
 # Example: `cu102`, `cu113`, etc.
@ -65,9 +65,6 @@ RUN python3 -m pip install --no-cache-dir python-Levenshtein
 # For `FastSpeech2ConformerTokenizer` tokenizer
 RUN python3 -m pip install --no-cache-dir g2p-en

-# For Some bitsandbytes tests
-RUN python3 -m pip install --no-cache-dir einops
-
 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.
 RUN cd transformers && python3 setup.py develop
--- a/docker/transformers-doc-builder/Dockerfile
+++ b/docker/transformers-doc-builder/Dockerfile
@ -8,9 +8,7 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip && python3 -m pip instal
 RUN apt-get -y update && apt-get install -y libsndfile1-dev && apt install -y tesseract-ocr

 # Torch needs to be installed before deepspeed
-# RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed]
-RUN python3 -m pip uninstall -y deepspeed
-RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
+RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed]

 RUN python3 -m pip install --no-cache-dir torchvision git+https://github.com/facebookresearch/detectron2.git pytesseract
 RUN python3 -m pip install -U "itsdangerous<2.1.0"
--- a/docker/transformers-past-gpu/Dockerfile
+++ b/docker/transformers-past-gpu/Dockerfile
@ -48,8 +48,8 @@ RUN python3 -m pip uninstall -y torch-tensorrt apex
 # Pre-build **nightly** release of DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
 RUN python3 -m pip uninstall -y deepspeed
 # This has to be run inside the GPU VMs running the tests. (So far, it fails here due to GPU checks during compilation.)
-# Issue: https://github.com/deepspeedai/DeepSpeed/issues/2010
-# RUN git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build && \
+# Issue: https://github.com/microsoft/DeepSpeed/issues/2010
+# RUN git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build && \
 #    DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1

 RUN python3 -m pip install -U "itsdangerous<2.1.0"
--- a/docker/transformers-pytorch-amd-gpu/Dockerfile
+++ b/docker/transformers-pytorch-amd-gpu/Dockerfile
@ -1,18 +1,17 @@
-FROM rocm/dev-ubuntu-22.04:6.2.4
+FROM rocm/dev-ubuntu-22.04:6.0.2
+# rocm/pytorch has no version with 2.1.0
 LABEL maintainer="Hugging Face"

 ARG DEBIAN_FRONTEND=noninteractive

 RUN apt update && \
-    apt install -y --no-install-recommends git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-dev python3-pip python3-dev ffmpeg git-lfs && \
+    apt install -y --no-install-recommends git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-dev python3-pip python3-dev ffmpeg && \
    apt clean && \
    rm -rf /var/lib/apt/lists/*

-RUN git lfs install
-
 RUN python3 -m pip install --no-cache-dir --upgrade pip numpy

-RUN python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2
+RUN python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.0

 RUN python3 -m pip install --no-cache-dir --upgrade importlib-metadata setuptools ninja git+https://github.com/facebookresearch/detectron2.git pytesseract "itsdangerous<2.1.0"

@ -31,5 +30,5 @@ RUN python3 -m pip uninstall -y tensorflow flax
 # this line must be added in order for python to be aware of transformers.
 RUN cd transformers && python3 setup.py develop

-# Remove nvml and nvidia-ml-py as it is not compatible with ROCm. apex is not tested on NVIDIA either.
-RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y
+# Remove nvml as it is not compatible with ROCm. apex is not tested on NVIDIA either.
+RUN python3 -m pip uninstall py3nvml pynvml apex -y
--- a/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile
+++ b/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile
@ -1,11 +1,11 @@
-FROM rocm/dev-ubuntu-22.04:6.2.4
+FROM rocm/dev-ubuntu-22.04:5.6
 LABEL maintainer="Hugging Face"

 ARG DEBIAN_FRONTEND=noninteractive
-ARG PYTORCH='2.5.1'
-ARG TORCH_VISION='0.20.0'
-ARG TORCH_AUDIO='2.5.0'
-ARG ROCM='6.2'
+ARG PYTORCH='2.1.1'
+ARG TORCH_VISION='0.16.1'
+ARG TORCH_AUDIO='2.1.1'
+ARG ROCM='5.6'

 RUN apt update && \
    apt install -y --no-install-recommends \
@ -45,4 +45,4 @@ RUN cd transformers && python3 setup.py develop
 RUN python3 -c "from deepspeed.launcher.runner import main"

 # Remove nvml as it is not compatible with ROCm
-RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y
+RUN python3 -m pip uninstall py3nvml pynvml -y
--- a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
+++ b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
@ -1,5 +1,5 @@
 # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11
-FROM nvcr.io/nvidia/pytorch:23.11-py3
+FROM nvcr.io/nvidia/pytorch:23.04-py3
 LABEL maintainer="Hugging Face"

 ARG DEBIAN_FRONTEND=noninteractive
--- a/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile
+++ b/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile
@ -34,8 +34,8 @@ RUN python3 -m pip uninstall -y torch-tensorrt apex
 # Pre-build **nightly** release of DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
 RUN python3 -m pip uninstall -y deepspeed
 # This has to be run inside the GPU VMs running the tests. (So far, it fails here due to GPU checks during compilation.)
-# Issue: https://github.com/deepspeedai/DeepSpeed/issues/2010
-# RUN git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build && \
+# Issue: https://github.com/microsoft/DeepSpeed/issues/2010
+# RUN git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build && \
 #    DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1

 ## For `torchdynamo` tests
--- a/docker/transformers-pytorch-gpu/Dockerfile
+++ b/docker/transformers-pytorch-gpu/Dockerfile
@ -11,7 +11,7 @@ ARG REF=main
 RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF

 # If set to nothing, will install the latest version
-ARG PYTORCH='2.6.0'
+ARG PYTORCH='2.5.1'
 ARG TORCH_VISION=''
 ARG TORCH_AUDIO=''
 # Example: `cu102`, `cu113`, etc.
--- a/docker/transformers-quantization-latest-gpu/Dockerfile
+++ b/docker/transformers-quantization-latest-gpu/Dockerfile
@ -50,12 +50,6 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef
 # Add aqlm for quantization testing
 RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2

-# Add vptq for quantization testing
-RUN python3 -m pip install --no-cache-dir vptq
-
-# Add spqr for quantization testing
-RUN python3 -m pip install --no-cache-dir spqr_quant[gpu]
-
 # Add hqq for quantization testing
 RUN python3 -m pip install --no-cache-dir hqq

@ -72,10 +66,6 @@ RUN python3 -m pip install --no-cache-dir optimum-quanto
 # Add eetq for quantization testing
 RUN python3 -m pip install git+https://github.com/NetEase-FuXi/EETQ.git

-# Add flute-kernel and fast_hadamard_transform for quantization testing
-RUN python3 -m pip install --no-cache-dir flute-kernel==0.3.0 -i https://flute-ai.github.io/whl/cu118
-RUN python3 -m pip install --no-cache-dir fast_hadamard_transform==1.0.4.post1
-
 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.
 RUN cd transformers && python3 setup.py develop
--- a/docs/source/ar/_toctree.yml
+++ b/docs/source/ar/_toctree.yml
@ -30,26 +30,26 @@
  - local: conversations
    title: الدردشة مع المحولات
  title: البرامج التعليمية
- sections:
-  - isExpanded: false
-    sections:
-    - local: tasks/sequence_classification
-      title: تصنيف النصوص
-    - local: tasks/token_classification
-      title: تصنيف الرموز
-    - local: tasks/question_answering
-      title: الإجابة على الأسئلة
-    - local: tasks/language_modeling
-      title: نمذجة اللغة السببية
-    - local: tasks/masked_language_modeling
-      title: نمذجة اللغة المقنعة
-    - local: tasks/translation
-      title: الترجمة
-    - local: tasks/summarization
-      title: التلخيص
-    - local: tasks/multiple_choice
-      title: الاختيار المتعدد
-    title: معالجة اللغات الطبيعية
+# - sections:
+#   - isExpanded: false
+#     sections:
+#     - local: tasks/sequence_classification
+#       title: تصنيف النصوص
+#     - local: tasks/token_classification
+#       title: تصنيف الرموز
+#     - local: tasks/question_answering
+#       title: الإجابة على الأسئلة
+#     - local: tasks/language_modeling
+#       title: نمذجة اللغة السببية
+#     - local: tasks/masked_language_modeling
+#       title: نمذجة اللغة المقنعة
+#     - local: tasks/translation
+#       title: الترجمة
+#     - local: tasks/summarization
+#       title: التلخيص
+#     - local: tasks/multiple_choice
+#       title: الاختيار المتعدد
+#     title: معالجة اللغات الطبيعية
 #   - isExpanded: false
 #     sections:
 #     - local: tasks/audio_classification
@ -107,10 +107,10 @@
 #     - local: tasks/prompting
 #       title: دليل إرشادي لمحفزات النماذج اللغوية الكبيرة
 #     title: الإرشاد
-  title: أدلة المهام
+#   title: أدلة المهام
 - sections:
  - local: fast_tokenizers
-    title: استخدم مجزئيات النصوص السريعة من 🤗 Tokenizers
+    title: استخدم مجزئيات النصوص السريعة من 🤗 Tokenizers 
  - local: multilingual
    title: الاستدلال باستخدام نماذج متعددة اللغات
  - local: create_a_model
@ -129,20 +129,16 @@
    title: التصدير إلى TFLite
  - local: torchscript
    title: التصدير إلى TorchScript
+  - local: benchmarks
+    title: المعايير
  - local: notebooks
    title: دفاتر الملاحظات مع الأمثلة
-  - local: community
-    title: موارد المجتمع
+#   - local: community
+#     title: موارد المجتمع
  - local: troubleshooting
    title: استكشاف الأخطاء وإصلاحها
  - local: gguf
    title: التوافق مع ملفات GGUF
-  - local: tiktoken
-    title: التوافق مع ملفات TikToken
-  - local: modular_transformers
-    title: الوحدات النمطية في `transformers`
-  - local: how_to_hack_models
-    title: اختراق النموذج (الكتابة فوق فئة لاستخدامك)
  title: أدلة المطورين
 # - sections:
 #   - local: quantization/overview
@ -155,8 +151,6 @@
 #     title: AWQ
 #   - local: quantization/aqlm
 #     title: AQLM
-#   - local: quantization/vptq
-#     title: VPTQ
 #   - local: quantization/quanto
 #     title: Quanto
 #   - local: quantization/eetq
@ -881,7 +875,7 @@
 #     - local: internal/pipelines_utils
 #       title: مرافق خطوط الأنابيب
 #     - local: internal/tokenization_utils
-#       title: مرافق مقسم النصوص
+#       title: مرافق مقسم النصوص 
 #     - local: internal/trainer_utils
 #       title: مرافق المدرب
 #     - local: internal/generation_utils
--- a/docs/source/ar/benchmarks.md
+++ b/docs/source/ar/benchmarks.md
@ -0,0 +1,352 @@
+# معايير الأداء
+<Tip warning={true}>
+
+أدوات قياس الأداء من Hugging Face أصبحت قديمة،ويُنصح باستخدام مكتبات خارجية لقياس سرعة وتعقيد الذاكرة لنماذج Transformer.
+
+</Tip>
+
+[[open-in-colab]]
+
+لنلق نظرة على كيفية تقييم أداء نماذج 🤗 Transformers، وأفضل الممارسات، ومعايير الأداء المتاحة بالفعل.
+
+يُمكن العثور على دفتر ملاحظات يشرح بالتفصيل كيفية قياس أداء نماذج 🤗 Transformers [هنا](https://github.com/huggingface/notebooks/tree/main/examples/benchmark.ipynb).
+
+## كيفية قياس أداء نماذج 🤗 Transformers
+
+تسمح الفئتان [`PyTorchBenchmark`] و [`TensorFlowBenchmark`] بتقييم أداء نماذج 🤗 Transformers بمرونة. تتيح لنا فئات التقييم قياس الأداء قياس _الاستخدام الأقصى للذاكرة_ و _الوقت اللازم_ لكل من _الاستدلال_ و _التدريب_.
+
+<Tip>
+
+هنا، ييُعرَّف _الاستدلال_ بأنه تمريرة أمامية واحدة، ويتم تعريف _التدريب_ بأنه تمريرة أمامية واحدة وتمريرة خلفية واحدة.
+
+</Tip>
+
+تتوقع فئات تقييم الأداء [`PyTorchBenchmark`] و [`TensorFlowBenchmark`] كائنًا من النوع [`PyTorchBenchmarkArguments`] و [`TensorFlowBenchmarkArguments`]، على التوالي، للتنفيذ. [`PyTorchBenchmarkArguments`] و [`TensorFlowBenchmarkArguments`] هي فئات بيانات وتحتوي على جميع التكوينات ذات الصلة لفئة تقييم الأداء المقابلة. في المثال التالي، يتم توضيح كيفية تقييم أداء نموذج BERT من النوع _bert-base-cased_.
+
+<frameworkcontent>
+<pt>
+  
+```py
+>>> from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments
+
+>>> args = PyTorchBenchmarkArguments(models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512])
+>>> benchmark = PyTorchBenchmark(args)
+```
+</pt>
+<tf>
+  
+```py
+>>> from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments
+
+>>> args = TensorFlowBenchmarkArguments(
+...     models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+... )
+>>> benchmark = TensorFlowBenchmark(args)
+```
+</tf>
+</frameworkcontent>
+
+هنا، يتم تمرير ثلاثة معامﻻت إلى فئات بيانات حجة قياس الأداء، وهي `models` و `batch_sizes` و `sequence_lengths`. المعامل `models` مطلوبة وتتوقع `قائمة` من بمعرّفات النموذج من [مركز النماذج](https://huggingface.co/models) تحدد معامﻻت القائمة `batch_sizes` و `sequence_lengths` حجم `input_ids` الذي يتم قياس أداء النموذج عليه. هناك العديد من المعلمات الأخرى التي يمكن تكوينها عبر فئات بيانات معال قياس الأداء. لمزيد من التفاصيل حول هذه المعلمات، يمكنك إما الرجوع مباشرة إلى الملفات `src/transformers/benchmark/benchmark_args_utils.py`، `src/transformers/benchmark/benchmark_args.py` (لـ PyTorch) و `src/transformers/benchmark/benchmark_args_tf.py` (لـ Tensorflow). أو، بدلاً من ذلك، قم بتشغيل أوامر shell التالية من المجلد الرئيسي لطباعة قائمة وصفية بجميع المعلمات القابلة للتكوين لـ PyTorch و Tensorflow على التوالي.
+
+<frameworkcontent>
+<pt>
+  
+```bash
+python examples/pytorch/benchmarking/run_benchmark.py --help
+```
+
+يُمكن ببساطة تشغيل كائن التقييم الذي تم تهيئته عن طريق استدعاء `benchmark.run()`.
+
+```py
+>>> results = benchmark.run()
+>>> print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
+--------------------------------------------------------------------------------
+Model Name             Batch Size     Seq Length     Time in s                  
+--------------------------------------------------------------------------------
+google-bert/bert-base-uncased          8               8             0.006     
+google-bert/bert-base-uncased          8               32            0.006     
+google-bert/bert-base-uncased          8              128            0.018     
+google-bert/bert-base-uncased          8              512            0.088     
+--------------------------------------------------------------------------------
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+--------------------------------------------------------------------------------
+Model Name             Batch Size     Seq Length    Memory in MB 
+--------------------------------------------------------------------------------
+google-bert/bert-base-uncased          8               8             1227
+google-bert/bert-base-uncased          8               32            1281
+google-bert/bert-base-uncased          8              128            1307
+google-bert/bert-base-uncased          8              512            1539
+--------------------------------------------------------------------------------
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+- transformers_version: 2.11.0
+- framework: PyTorch
+- use_torchscript: False
+- framework_version: 1.4.0
+- python_version: 3.6.10
+- system: Linux
+- cpu: x86_64
+- architecture: 64bit
+- date: 2020-06-29
+- time: 08:58:43.371351
+- fp16: False
+- use_multiprocessing: True
+- only_pretrain_model: False
+- cpu_ram_mb: 32088
+- use_gpu: True
+- num_gpus: 1
+- gpu: TITAN RTX
+- gpu_ram_mb: 24217
+- gpu_power_watts: 280.0
+- gpu_performance_state: 2
+- use_tpu: False
+```
+</pt>
+<tf>
+  
+```bash
+python examples/tensorflow/benchmarking/run_benchmark_tf.py --help
+```
+
+يُمكن بعد ذلك تشغيل كائن قياس الأداء الذي تم تهيئته عن طريق استدعاء `benchmark.run()`.
+
+```py
+>>> results = benchmark.run()
+>>> print(results)
+>>> results = benchmark.run()
+>>> print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
+--------------------------------------------------------------------------------
+Model Name             Batch Size     Seq Length     Time in s                  
+--------------------------------------------------------------------------------
+google-bert/bert-base-uncased          8               8             0.005
+google-bert/bert-base-uncased          8               32            0.008
+google-bert/bert-base-uncased          8              128            0.022
+google-bert/bert-base-uncased          8              512            0.105
+--------------------------------------------------------------------------------
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+--------------------------------------------------------------------------------
+Model Name             Batch Size     Seq Length    Memory in MB 
+--------------------------------------------------------------------------------
+google-bert/bert-base-uncased          8               8             1330
+google-bert/bert-base-uncased          8               32            1330
+google-bert/bert-base-uncased          8              128            1330
+google-bert/bert-base-uncased          8              512            1770
+--------------------------------------------------------------------------------
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+- transformers_version: 202.11.0
+- framework: Tensorflow
+- use_xla: False
+- framework_version: 2.2.0
+- python_version: 3.6.10
+- system: Linux
+- cpu: x86_64
+- architecture: 64bit
+- date: 2020-06-29
+- time: 09:26:35.617317
+- fp16: False
+- use_multiprocessing: True
+- only_pretrain_model: False
+- cpu_ram_mb: 32088
+- use_gpu: True
+- num_gpus: 1
+- gpu: TITAN RTX
+- gpu_ram_mb: 24217
+- gpu_power_watts: 280.0
+- gpu_performance_state: 2
+- use_tpu: False
+```
+</tf>
+</frameworkcontent>
+
+بشكل افتراضي، يتم تقييم _الوقت_ و _الذاكرة المطلوبة_ لـ _الاستدلال_. في مثال المخرجات أعلاه، يُظهر القسمان الأولان النتيجة المقابلة لـ _وقت الاستدلال_ و _ذاكرة الاستدلال_. بالإضافة إلى ذلك، يتم طباعة جميع المعلومات ذات الصلة حول بيئة الحوسبة، على سبيل المثال نوع وحدة معالجة الرسومات (GPU)، والنظام، وإصدارات المكتبة، وما إلى ذلك، في القسم الثالث تحت _معلومات البيئة_. يمكن حفظ هذه المعلومات بشكل اختياري في ملف _.csv_ عند إضافة المعامل `save_to_csv=True` إلى [`PyTorchBenchmarkArguments`] و [`TensorFlowBenchmarkArguments`] على التوالي. في هذه الحالة، يتم حفظ كل قسم في ملف _.csv_ منفصل. يمكن اختيارًا تحديد مسار كل ملف _.csv_ عبر فئات بيانات معامل قياس الأداء.
+
+بدلاً من تقييم النماذج المدربة مسبقًا عبر معرّف النموذج، على سبيل المثال `google-bert/bert-base-uncased`، يُمكن للمستخدم بدلاً من ذلك قياس أداء تكوين عشوائي لأي فئة نموذج متاحة. في هذه الحالة، يجب إدراج "قائمة" من التكوينات مع معامل قياس الأداء كما هو موضح أدناه.
+
+<frameworkcontent>
+<pt>
+  
+```py
+>>> from transformers import PyTorchBenchmark، PyTorchBenchmarkArguments، BertConfig
+
+>>> args = PyTorchBenchmarkArguments(
+...     models=["bert-base"، "bert-384-hid"، "bert-6-lay"]، batch_sizes=[8]، sequence_lengths=[8، 32، 128، 512]
+... )
+>>> config_base = BertConfig()
+>>> config_384_hid = BertConfig(hidden_size=384)
+>>> config_6_lay = BertConfig(num_hidden_layers=6)
+
+>>> benchmark = PyTorchBenchmark(args، configs=[config_base، config_384_hid، config_6_lay])
+>>> benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
+--------------------------------------------------------------------------------
+Model Name             Batch Size     Seq Length       Time in s                  
+--------------------------------------------------------------------------------
+bert-base                  8              128            0.006
+bert-base                  8              512            0.006
+bert-base                  8              128            0.018     
+bert-base                  8              512            0.088     
+bert-384-hid              8               8             0.006     
+bert-384-hid              8               32            0.006     
+bert-384-hid              8              128            0.011     
+bert-384-hid              8              512            0.054     
+bert-6-lay                 8               8             0.003     
+bert-6-lay                 8               32            0.004     
+bert-6-lay                 8              128            0.009     
+bert-6-lay                 8              512            0.044
+--------------------------------------------------------------------------------
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+--------------------------------------------------------------------------------
+Model Name             Batch Size     Seq Length      Memory in MB
+## نتائج اختبار الأداء
+
+في هذا القسم، يتم قياس _وقت الاستدلال_ و _الذاكرة المطلوبة_ للاستدلال، لمختلف تكوينات `BertModel`. يتم عرض النتائج في جدول، مع تنسيق مختلف قليلاً لكل من PyTorch و TensorFlow.
+
+--------------------------------------------------------------------------------
+| اسم النموذج | حجم الدفعة | طول التسلسل | الذاكرة بالميغابايت |
+--------------------------------------------------------------------------------
+| bert-base | 8 | 8 | 1277 |
+| bert-base | 8 | 32 | 1281 |
+| bert-base | 8 | 128 | 1307 |
+| bert-base | 8 | 512 | 1539 |
+| bert-384-hid | 8 | 8 | 1005 |
+| bert-384-hid | 8 | 32 | 1027 |
+| bert-384-hid | 8 | 128 | 1035 |
+| bert-384-hid | 8 | 512 | 1255 |
+| bert-6-lay | 8 | 8 | 1097 |
+| bert-6-lay | 8 | 32 | 1101 |
+| bert-6-lay | 8 | 128 | 1127 |
+| bert-6-lay | 8 | 512 | 1359 |
+--------------------------------------------------------------------------------
+
+==================== معلومات البيئة ====================
+
+- transformers_version: 2.11.0
+- framework: PyTorch
+- use_torchscript: False
+- framework_version: 1.4.0
+- python_version: 3.6.10
+- system: Linux
+- cpu: x86_64
+- architecture: 64bit
+- date: 2020-06-29
+- time: 09:35:25.143267
+- fp16: False
+- use_multiprocessing: True
+- only_pretrain_model: False
+- cpu_ram_mb: 32088
+- use_gpu: True
+- num_gpus: 1
+- gpu: TITAN RTX
+- gpu_ram_mb: 24217
+- gpu_power_watts: 280.0
+- gpu_performance_state: 2
+- use_tpu: False
+```
+</pt>
+<tf>
+  
+```py
+>>> from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments, BertConfig
+
+>>> args = TensorFlowBenchmarkArguments(
+...     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+... )
+>>> config_base = BertConfig()
+>>> config_384_hid = BertConfig(hidden_size=384)
+>>> config_6_lay = BertConfig(num_hidden_layers=6)
+
+>>> benchmark = TensorFlowBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+>>> benchmark.run()
+==================== نتائج السرعة في الاستدلال ====================
+--------------------------------------------------------------------------------
+| اسم النموذج | حجم الدفعة | طول التسلسل | الوقت بالثانية |
+--------------------------------------------------------------------------------
+| bert-base | 8 | 8 | 0.005 |
+| bert-base | 8 | 32 | 0.008 |
+| bert-base | 8 | 128 | 0.022 |
+| bert-base | 8 | 512 | 0.106 |
+| bert-384-hid | 8 | 8 | 0.005 |
+| bert-384-hid | 8 | 32 | 0.007 |
+| bert-384-hid | 8 | 128 | 0.018 |
+| bert-384-hid | 8 | 512 | 0.064 |
+| bert-6-lay | 8 | 8 | 0.002 |
+| bert-6-lay | 8 | 32 | 0.003 |
+| bert-6-lay | 8 | 128 | 0.0011 |
+| bert-6-lay | 8 | 512 | 0.074 |
+--------------------------------------------------------------------------------
+
+==================== نتائج الذاكرة في الاستدلال ====================
+--------------------------------------------------------------------------------
+| اسم النموذج | حجم الدفعة | طول التسلسل | الذاكرة بالميغابايت |
+--------------------------------------------------------------------------------
+| اسم النموذج | حجم الدفعة | طول التسلسل | الذاكرة بالميغابايت |
+--------------------------------------------------------------------------------
+| bert-base | 8 | 8 | 1330 |
+| bert-base | 8 | 32 | 1330 |
+| bert-base | 8 | 128 | 1330 |
+| bert-base | 8 | 512 | 1770 |
+| bert-384-hid | 8 | 8 | 1330 |
+| bert-384-hid | 8 | 32 | 1330 |
+| bert-384-hid | 8 | 128 | 1330 |
+| bert-384-hid | 8 | 512 | 1540 |
+| bert-6-lay | 8 | 8 | 1330 |
+| bert-6-lay | 8 | 32 | 1330 |
+| bert-6-lay | 8 | 128 | 1330 |
+| bert-6-lay | 8 | 512 | 1540 |
+--------------------------------------------------------------------------------
+
+==================== معلومات البيئة ====================
+
+- transformers_version: 2.11.0
+- framework: Tensorflow
+- use_xla: False
+- framework_version: 2.2.0
+- python_version: 3.6.10
+- system: Linux
+- cpu: x86_64
+- architecture: 64bit
+- date: 2020-06-29
+- time: 09:38:15.487125
+- fp16: False
+- use_multiprocessing: True
+- only_pretrain_model: False
+- cpu_ram_mb: 32088
+- use_gpu: True
+- num_gpus: 1
+- gpu: TITAN RTX
+- gpu_ram_mb: 24217
+- gpu_power_watts: 280.0
+- gpu_performance_state: 2
+- use_tpu: False
+```
+</tf>
+</frameworkcontent>
+
+مرة أخرى، يتم قياس _وقت الاستدلال_ و _الذاكرة المطلوبة_ للاستدلال، ولكن هذه المرة لتكوينات مخصصة لـ `BertModel`. يمكن أن تكون هذه الميزة مفيدة بشكل خاص عند اتخاذ قرار بشأن التكوين الذي يجب تدريب النموذج عليه.
+
+## أفضل الممارسات في اختبار الأداء
+
+يسرد هذا القسم بعض أفضل الممارسات التي يجب مراعاتها عند إجراء اختبار الأداء لنموذج ما.
+
+- حالياً، يتم دعم اختبار الأداء على جهاز واحد فقط. عند إجراء الاختبار على وحدة معالجة الرسوميات (GPU)، يوصى بأن يقوم المستخدم بتحديد الجهاز الذي يجب تشغيل التعليمات البرمجية عليه من خلال تعيين متغير البيئة `CUDA_VISIBLE_DEVICES` في الشل، على سبيل المثال `export CUDA_VISIBLE_DEVICES=0` قبل تشغيل التعليمات البرمجية.
+- يجب تعيين الخيار `no_multi_processing` إلى `True` فقط لأغراض الاختبار والتصحيح. ولضمان قياس الذاكرة بدقة، يوصى بتشغيل كل اختبار ذاكرة في عملية منفصلة والتأكد من تعيين `no_multi_processing` إلى `True`.
+- يجب دائمًا ذكر معلومات البيئة عند مشاركة نتائج تقييم النموذج. يُمكن أن تختلف النتائج اختلافًا كبيرًا بين أجهزة GPU المختلفة وإصدارات المكتبات، وما إلى ذلك، لذلك فإن نتائج الاختبار بمفردها ليست مفيدة جدًا للمجتمع.
+
+## مشاركة نتائج اختبار الأداء الخاص بك
+
+في السابق، تم إجراء اختبار الأداء لجميع النماذج الأساسية المتاحة (10 في ذلك الوقت) لقياس _وقت الاستدلال_، عبر العديد من الإعدادات المختلفة: باستخدام PyTorch، مع TorchScript وبدونها، باستخدام TensorFlow، مع XLA وبدونه. تم إجراء جميع هذه الاختبارات على وحدات المعالجة المركزية (CPU) (باستثناء XLA TensorFlow) ووحدات معالجة الرسوميات (GPU).
+
+يتم شرح هذا النهج بالتفصيل في [منشور المدونة هذا](https://medium.com/huggingface/benchmarking-transformers-pytorch-and-tensorflow-e2917fb891c2) وتتوفر النتائج [هنا](https://docs.google.com/spreadsheets/d/1sryqufw2D0XlUH4sq3e9Wnxu5EAQkaohzrJbd5HdQ_w/edit?usp=sharing).
+
+مع أدوات اختبار الأداء الجديدة، أصبح من الأسهل من أي وقت مضى مشاركة نتائج اختبار الأداء الخاص بك مع المجتمع:
+
+- [نتائج اختبار الأداء في PyTorch](https://github.com/huggingface/transformers/tree/main/examples/pytorch/benchmarking/README.md).
+- [نتائج اختبار الأداء في TensorFlow](https://github.com/huggingface/transformers/tree/main/examples/tensorflow/benchmarking/README.md).
--- a/docs/source/ar/community.md
+++ b/docs/source/ar/community.md
@ -1,66 +0,0 @@
-# مجتمع المطورين
-
-هذه الصفحة تجمع الموارد حول 🤗 Transformers التي طورها المجتمع.
-
-## موارد المجتمع:
-
-| المصدر     |      الوصف      |      المؤلف      |
-|:----------|:-------------|------:|
-| [Hugging Face Transformers Glossary Flashcards](https://www.darigovresearch.com/huggingface-transformers-glossary-flashcards) | مجموعة من البطاقات التعليمية القائمة على [Transformers Docs Glossary](glossary) والتي تم وضعها في شكل يمكن تعلمه/مراجعته بسهولة باستخدام [Anki](https://apps.ankiweb.net/) وهو تطبيق مفتوح المصدر متعدد المنصات مصمم خصيصًا للاحتفاظ بالمعرفة على المدى الطويل. شاهد هذا [فيديو تمهيدي حول كيفية استخدام البطاقات التعليمية](https://www.youtube.com/watch?v=Dji_7PILrw). | [Darigov Research](https://www.darigovresearch.com/) |
-
-## دفاتر ملاحظات المجتمع:
-
-| الدفتر     |      الوصف      |      المؤلف      |      |
-|:----------|:-------------|:-------------|------:|
-| [Fine-tune a pre-trained Transformer to generate lyrics](https://github.com/AlekseyKorshuk/huggingartists) | كيفية توليد كلمات الأغاني على غرار فنانك المفضل من خلال ضبط نموذج GPT-2 |  [Aleksey Korshuk](https://github.com/AlekseyKorshuk) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AlekseyKorshuk/huggingartists/blob/master/huggingartists-demo.ipynb) |
-| [Train T5 in Tensorflow 2](https://github.com/snapthat/TF-T5-text-to-text) | كيفية تدريب T5 لأي مهمة باستخدام Tensorflow 2. يوضح هذا الدفتر مهمة السؤال والجواب المنفذة في Tensorflow 2 باستخدام SQUAD | [Muhammad Harris](https://github.com/HarrisDePerceptron) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/snapthat/TF-T5-text-to-text/blob/master/snapthatT5/notebooks/TF-T5-Datasets%20Training.ipynb) |
-| [Train T5 on TPU](https://github.com/patil-suraj/exploring-T5/blob/master/T5_on_TPU.ipynb)  | كيفية تدريب T5 على SQUAD مع Transformers و Nlp | [Suraj Patil](https://github.com/patil-suraj) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patil-suraj/exploring-T5/blob/master/T5_on_TPU.ipynb#scrollTo=QLGiFCDqvuil) |
-| [Fine-tune T5 for Classification and Multiple Choice](https://github.com/patil-suraj/exploring-T5/blob/master/t5_fine_tuning.ipynb)  | كيفية ضبط نموذج T5 للتصنيف والمهام متعددة الخيارات باستخدام تنسيق النص إلى نص مع PyTorch Lightning |  [Suraj Patil](https://github.com/patil-suraj) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patil-suraj/exploring-T5/blob/master/t5_fine_tuning.ipynb) |
-| [Fine-tune DialoGPT on New Datasets and Languages](https://github.com/ncoop57/i-am-a-nerd/blob/master/_notebooks/2020-05-12-chatbot-part-1.ipynb)  | كيفية ضبط نموذج DialoGPT على مجموعة بيانات جديدة لروبوتات الدردشة المحادثية المفتوحة |  [Nathan Cooper](https://github.com/ncoop57) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ncoop57/i-am-a-nerd/blob/master/_notebooks/2020-05-12-chatbot-part-1.ipynb) |
-| [Long Sequence Modeling with Reformer](https://github.com/patrickvonplaten/notebooks/blob/master/PyTorch_Reformer.ipynb)  | كيفية التدريب على تسلسلات طويلة تصل إلى 500,000 رمز باستخدام Reformer |  [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/PyTorch_Reformer.ipynb)  |
-| [Fine-tune BART for Summarization](https://github.com/ohmeow/ohmeow_website/blob/master/posts/2021-05-25-mbart-sequence-classification-with-blurr.ipynb) | كيفية ضبط نموذج BART للتلخيص باستخدام fastai باستخدام blurr | [Wayde Gilliam](https://ohmeow.com/) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ohmeow/ohmeow_website/blob/master/posts/2021-05-25-mbart-sequence-classification-with-blurr.ipynb) |
-| [Fine-tune a pre-trained Transformer on anyone's tweets](https://colab.research.google.com/github/borisdayma/huggingtweets/blob/master/huggingtweets-demo.ipynb) | كيفية توليد تغريدات على غرار حساب Twitter المفضل لديك من خلال ضبط نموذج GPT-2 |  [Boris Dayma](https://github.com/borisdayma) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/borisdayma/huggingtweets/blob/master/huggingtweets-demo.ipynb) |
-| [Optimize 🤗 Hugging Face models with Weights & Biases](https://colab.research.google.com/github/wandb/examples/blob/master/colabs/huggingface/Optimize_Hugging_Face_models_with_Weights_%26_Biases.ipynb) | دليل كامل لعرض تكامل W&B مع Hugging Face | [Boris Dayma](https://github.com/borisdayma) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/wandb/examples/blob/master/colabs/huggingface/Optimize_Hugging_Face_models_with_Weights_%26_Biases.ipynb) |
-| [Pretrain Longformer](https://github.com/allenai/longformer/blob/master/scripts/convert_model_to_long.ipynb)  | كيفية بناء نسخة "طويلة" من النماذج المسبقة التدريب الموجودة |  [Iz Beltagy](https://beltagy.net) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/allenai/longformer/blob/master/scripts/convert_model_to_long.ipynb) |
-| [Fine-tune Longformer for QA](https://github.com/patil-suraj/Notebooks/blob/master/longformer_qa_training.ipynb) | كيفية ضبط نموذج Longformer لمهمة QA | [Suraj Patil](https://github.com/patil-suraj) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patil-suraj/Notebooks/blob/master/longformer_qa_training.ipynb) |
-| [Evaluate Model with 🤗nlp](https://github.com/patrickvonplaten/notebooks/blob/master/How_to_evaluate_Longformer_on_TriviaQA_using_NLP.ipynb) | كيفية تقييم نموذج Longformer على TriviaQA مع `nlp` | [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1m7eTGlPmLRgoPkkA7rkhQdZ9ydpmsdLE?usp=sharing) |
-| [Fine-tune T5 for Sentiment Span Extraction](https://github.com/enzoampil/t5-intro/blob/master/t5_qa_training_pytorch_span_extraction.ipynb)  | كيفية ضبط نموذج T5 لاستخراج المشاعر باستخدام تنسيق النص إلى نص مع PyTorch Lightning |  [Lorenzo Ampil](https://github.com/enzoampil) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/enzoampil/t5-intro/blob/master/t5_qa_training_pytorch_span_extraction.ipynb) |
-| [Fine-tune DistilBert for Multiclass Classification](https://github.com/abhimishra91/transformers-tutorials/blob/master/transformers_multiclass_classification.ipynb) | كيفية ضبط نموذج DistilBert للتصنيف متعدد الفئات باستخدام PyTorch | [Abhishek Kumar Mishra](https://github.com/abhimishra91) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/abhimishra91/transformers-tutorials/blob/master/transformers_multiclass_classification.ipynb)|
-|[Fine-tune BERT for Multi-label Classification](https://github.com/abhimishra91/transformers-tutorials/blob/master/transformers_multi_label_classification.ipynb)|كيفية ضبط نموذج BERT للتصنيف متعدد التصنيفات باستخدام PyTorch|[Abhishek Kumar Mishra](https://github.com/abhimishra91) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/abhimishra91/transformers-tutorials/blob/master/transformers_multi_label_classification.ipynb)|
-|[Fine-tune T5 for Summarization](https://github.com/abhimishra91/transformers-tutorials/blob/master/transformers_summarization_wandb.ipynb)|كيفية ضبط نموذج T5 للتلخيص في PyTorch وتتبع التجارب باستخدام WandB|[Abhishek Kumar Mishra](https://github.com/abhimishra91) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/abhimishra91/transformers-tutorials/blob/master/transformers_summarization_wandb.ipynb)|
-|[Speed up Fine-Tuning in Transformers with Dynamic Padding / Bucketing](https://github.com/ELS-RD/transformers-notebook/blob/master/Divide_Hugging_Face_Transformers_training_time_by_2_or_more.ipynb)|كيفية تسريع الضبط الدقيق بعامل 2 باستخدام الضبط الديناميكي/التقسيم|[Michael Benesty](https://github.com/pommedeterresautee) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1CBfRU1zbfu7-ijiOqAAQUA-RJaxfcJoO?usp=sharing)|
-|[Pretrain Reformer for Masked Language Modeling](https://github.com/patrickvonplaten/notebooks/blob/master/Reformer_For_Masked_LM.ipynb)| كيفية تدريب نموذج Reformer مع طبقات الانتباه ثنائية الاتجاه | [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1tzzh0i8PgDQGV3SMFUGxM7_gGae3K-uW?usp=sharing)|
-|[Expand and Fine Tune Sci-BERT](https://github.com/lordtt13/word-embeddings/blob/master/COVID-19%20Research%20Data/COVID-SciBERT.ipynb)| كيفية زيادة مفردات نموذج SciBERT المسبق التدريب من AllenAI على مجموعة بيانات CORD وإنشاء خط أنابيب لها. | [Tanmay Thakur](https://github.com/lordtt13) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1rqAR40goxbAfez1xvF3hBJphSCsvXmh8)|
-|[Fine Tune BlenderBotSmall for Summarization using the Trainer API](https://github.com/lordtt13/transformers-experiments/blob/master/Custom%20Tasks/fine-tune-blenderbot_small-for-summarization.ipynb)| كيفية ضبط نموذج BlenderBotSmall للتلخيص على مجموعة بيانات مخصصة، باستخدام واجهة برمجة التطبيقات Trainer. | [Tanmay Thakur](https://github.com/lordtt13) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/19Wmupuls7mykSGyRN_Qo6lPQhgp56ymq?usp=sharing)|
-|[Fine-tune Electra and interpret with Integrated Gradients](https://github.com/elsanns/xai-nlp-notebooks/blob/master/electra_fine_tune_interpret_captum_ig.ipynb) | كيفية ضبط نموذج Electra للتحليل العاطفي وتفسير التنبؤات باستخدام Captum Integrated Gradients | [Eliza Szczechla](https://elsanns.github.io) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/elsanns/xai-nlp-notebooks/blob/master/electra_fine_tune_interpret_captum_ig.ipynb)|
-|[fine-tune a non-English GPT-2 Model with Trainer class](https://github.com/philschmid/fine-tune-GPT-2/blob/master/Fine_tune_a_non_English_GPT_2_Model_with_Huggingface.ipynb) | كيفية ضبط نموذج GPT-2 غير الإنجليزي باستخدام فئة Trainer | [Philipp Schmid](https://www.philschmid.de) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/philschmid/fine-tune-GPT-2/blob/master/Fine_tune_a_non_English_GPT_2_Model_with_Huggingface.ipynb)|
-|[Fine-tune a DistilBERT Model for Multi Label Classification task](https://github.com/DhavalTaunk08/Transformers_scripts/blob/master/Transformers_multilabel_distilbert.ipynb) | كيفية ضبط نموذج DistilBERT لمهمة التصنيف متعدد التصنيفات | [Dhaval Taunk](https://github.com/DhavalTaunk08) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/DhavalTaunk08/Transformers_scripts/blob/master/Transformers_multilabel_distilbert.ipynb)|
-|[Fine-tune ALBERT for sentence-pair classification](https://github.com/NadirEM/nlp-notebooks/blob/master/Fine_tune_ALBERT_sentence_pair_classification.ipynb) | كيفية ضبط نموذج ALBERT أو أي نموذج آخر قائم على BERT لمهمة التصنيف المزدوج للجمل | [Nadir El Manouzi](https://github.com/NadirEM) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NadirEM/nlp-notebooks/blob/master/Fine_tune_ALBERT_sentence_pair_classification.ipynb)|
-|[Fine-tune Roberta for sentiment analysis](https://github.com/DhavalTaunk08/NLP_scripts/blob/master/sentiment_analysis_using_roberta.ipynb) | كيفية ضبط نموذج Roberta للتحليل العاطفي | [Dhaval Taunk](https://github.com/DhavalTaunk08) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/DhavalTaunk08/NLP_scripts/blob/master/sentiment_analysis_using_roberta.ipynb)|
-|[Evaluating Question Generation Models](https://github.com/flexudy-pipe/qugeev) | ما مدى دقة الإجابات على الأسئلة التي يولدها نموذجك التحويلي seq2seq؟ | [Pascal Zoleko](https://github.com/zolekode) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1bpsSqCQU-iw_5nNoRm_crPq6FRuJthq_?usp=sharing)|
-|[Classify text with DistilBERT and Tensorflow](https://github.com/peterbayerle/huggingface_notebook/blob/main/distilbert_tf.ipynb) | كيفية ضبط نموذج DistilBERT للتصنيف النصي في TensorFlow | [Peter Bayerle](https://github.com/peterbayerle) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/peterbayerle/huggingface_notebook/blob/main/distilbert_tf.ipynb)|
-|[Leverage BERT for Encoder-Decoder Summarization on CNN/Dailymail](https://github.com/patrickvonplaten/notebooks/blob/master/BERT2BERT_for_CNN_Dailymail.ipynb) | كيفية البدء السريع لنموذج *EncoderDecoderModel* مع نقطة تفتيش *google-bert/bert-base-uncased* للتلخيص على CNN/Dailymail | [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/BERT2BERT_for_CNN_Dailymail.ipynb)|
-|[Leverage RoBERTa for Encoder-Decoder Summarization on BBC XSum](https://github.com/patrickvonplaten/notebooks/blob/master/RoBERTaShared_for_BBC_XSum.ipynb) | كيفية البدء السريع لنموذج *EncoderDecoderModel* المشترك مع نقطة تفتيش *FacebookAI/roberta-base* للتلخيص على BBC/XSum | [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/RoBERTaShared_for_BBC_XSum.ipynb)|
-|[Fine-tune TAPAS on Sequential Question Answering (SQA)](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/TAPAS/Fine_tuning_TapasForQuestionAnswering_on_SQA.ipynb) | كيفية ضبط نموذج *TapasForQuestionAnswering* مع نقطة تفتيش *tapas-base* على مجموعة بيانات Sequential Question Answering (SQA) | [Niels Rogge](https://github.com/nielsrogge) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/TAPAS/Fine_tuning_TapasForQuestionAnswering_on_SQA.ipynb)|
-|[Evaluate TAPAS on Table Fact Checking (TabFact)](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/TAPAS/Evaluating_TAPAS_on_the_Tabfact_test_set.ipynb) | كيفية تقييم نموذج *TapasForSequenceClassification* المضبوط مسبقًا مع نقطة تفتيش *tapas-base-finetuned-tabfact* باستخدام مزيج من مكتبتي 🤗 datasets و 🤗 transformers | [Niels Rogge](https://github.com/nielsrogge) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/TAPAS/Evaluating_TAPAS_on_the_Tabfact_test_set.ipynb)|
-|[Fine-tuning mBART for translation](https://colab.research.google.com/github/vasudevgupta7/huggingface-tutorials/blob/main/translation_training.ipynb) | كيفية ضبط نموذج mBART باستخدام Seq2SeqTrainer للترجمة من الهندية إلى الإنجليزية | [Vasudev Gupta](https://github.com/vasudevgupta7) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vasudevgupta7/huggingface-tutorials/blob/main/translation_training.ipynb)|
-|[Fine-tune LayoutLM on FUNSD (a form understanding dataset)](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/LayoutLM/Fine_tuning_LayoutLMForTokenClassification_on_FUNSD.ipynb) | كيفية ضبط نموذج *LayoutLMForTokenClassification* على مجموعة بيانات FUNSD لاستخراج المعلومات من المستندات الممسوحة ضوئيًا | [Niels Rogge](https://github.com/nielsrogge) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/LayoutLM/Fine_tuning_LayoutLMForTokenClassification_on_FUNSD.ipynb)|
-|[Fine-Tune DistilGPT2 and Generate Text](https://colab.research.google.com/github/tripathiaakash/DistilGPT2-Tutorial/blob/main/distilgpt2_fine_tuning.ipynb) | كيفية ضبط نموذج DistilGPT2 وتوليد النص | [Aakash Tripathi](https://github.com/tripathiaakash) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/tripathiaakash/DistilGPT2-Tutorial/blob/main/distilgpt2_fine_tuning.ipynb)|
-|[Fine-Tune LED on up to 8K tokens](https://github.com/patrickvonplaten/notebooks/blob/master/Fine_tune_Longformer_Encoder_Decoder_(LED)_for_Summarization_on_pubmed.ipynb) | كيفية ضبط نموذج LED على pubmed للتلخيص طويل المدى | [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/Fine_tune_Longformer_Encoder_Decoder_(LED)_for_Summarization_on_pubmed.ipynb)|
-|[Evaluate LED on Arxiv](https://github.com/patrickvonplaten/notebooks/blob/master/LED_on_Arxiv.ipynb) | كيفية تقييم نموذج LED للتلخيص طويل المدى بشكل فعال | [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/LED_on_Arxiv.ipynb)|
-|[Fine-tune LayoutLM on RVL-CDIP (a document image classification dataset)](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/LayoutLM/Fine_tuning_LayoutLMForSequenceClassification_on_RVL_CDIP.ipynb) | كيفية ضبط نموذج *LayoutLMForSequenceClassification* على مجموعة بيانات RVL-CDIP لتصنيف المستندات الممسوحة ضوئيًا | [Niels Rogge](https://github.com/nielsrogge) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/LayoutLM/Fine_tuning_LayoutLMForSequenceClassification_on_RVL_CDIP.ipynb)|
-|[Wav2Vec2 CTC decoding with GPT2 adjustment](https://github.com/voidful/huggingface_notebook/blob/main/xlsr_gpt.ipynb) | كيفية فك تشفير تسلسل CTC مع تعديل نموذج اللغة | [Eric Lam](https://github.com/voidful) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1e_zQHYbO2YKEaUgzb1ww1WwiAyydAj?usp=sharing)|
-|[Fine-tune BART for summarization in two languages with Trainer class](https://github.com/elsanns/xai-nlp-notebooks/blob/master/fine_tune_bart_summarization_two_langs.ipynb) | كيفية ضبط نموذج BART للتلخيص بلغتين باستخدام فئة Trainer | [Eliza Szczechla](https://github.com/elsanns) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/elsanns/xai-nlp-notebooks/blob/master/fine_tune_bart_summarization_two_langs.ipynb)|
-|[Evaluate Big Bird on Trivia QA](https://github.com/patrickvonplaten/notebooks/blob/master/Evaluating_Big_Bird_on_TriviaQA.ipynb) | كيفية تقييم نموذج BigBird للأسئلة والأجوبة على وثائق طويلة على Trivia QA | [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/Evaluating_Big_Bird_on_TriviaQA.ipynb)|
-| [Create video captions using Wav2Vec2](https://github.com/Muennighoff/ytclipcc/blob/main/wav2vec_youtube_captions.ipynb) | كيفية إنشاء تعليقات توضيحية على YouTube من أي فيديو من خلال تفريغ الصوت باستخدام Wav2Vec | [Niklas Muennighoff](https://github.com/Muennighoff) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Muennighoff/ytclipcc/blob/main/wav2vec_youtube_captions.ipynb) |
-| [Fine-tune the Vision Transformer on CIFAR-10 using PyTorch Lightning](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/VisionTransformer/Fine_tuning_the_Vision_Transformer_on_CIFAR_10_with_PyTorch_Lightning.ipynb) | كيفية ضبط نموذج Vision Transformer (ViT) على CIFAR-10 باستخدام مكتبات HuggingFace Transformers و Datasets و PyTorch Lightning | [Niels Rogge](https://github.com/nielsrogge) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/VisionTransformer/Fine_tuning_the_Vision_Transformer_on_CIFAR_10_with_PyTorch_Lightning.ipynb) |
-| [Fine-tune the Vision Transformer on CIFAR-10 using the 🤗 Trainer](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/VisionTransformer/Fine_tuning_the_Vision_Transformer_on_CIFAR_10_with_the_%F0%9F%A4%97_Trainer.ipynb) | كيفية ضبط نموذج Vision Transformer (ViT) على CIFAR-10 باستخدام مكتبات HuggingFace Transformers و Datasets و 🤗 Trainer | [Niels Rogge](https://github.com/nielsrogge) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/VisionTransformer/Fine_tuning_the_Vision_Transformer_on_CIFAR_10_with_the_%F0%9F%A4%97_Trainer.ipynb) |
-| [Evaluate LUKE on Open Entity, an entity typing dataset](https://github.com/studio-ousia/luke/blob/master/notebooks/huggingface_open_entity.ipynb) | كيفية تقييم نموذج *LukeForEntityClassification* على مجموعة بيانات Open Entity | [Ikuya Yamada](https://github.com/ikuyamada) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/studio-ousia/luke/blob/master/notebooks/huggingface_open_entity.ipynb) |
-| [Evaluate LUKE on TACRED, a relation extraction dataset](https://github.com/studio-ousia/luke/blob/master/notebooks/huggingface_tacred.ipynb) | كيفية تقييم نموذج *LukeForEntityPairClassification* على مجموعة بيانات TACRED | [Ikuya Yamada](https://github.com/ikuyamada) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/studio-ousia/luke/blob/master/notebooks/huggingface_tacred.ipynb) |
-| [Evaluate LUKE on CoNLL-2003, an important NER benchmark](https://github.com/studio-ousia/luke/blob/master/notebooks/huggingface_conll_2003.ipynb) | كيفية تقييم نموذج *LukeForEntitySpanClassification* على مجموعة بيانات CoNLL-2003 | [Ikuya Yamada](https://github.com/ikuyamada) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/studio-ousia/luke/blob/master/notebooks/huggingface_conll_2003.ipynb) |
-| [Evaluate BigBird-Pegasus on PubMed dataset](https://github.com/vasudevgupta7/bigbird/blob/main/notebooks/bigbird_pegasus_evaluation.ipynb) | كيفية تقييم نموذج *BigBirdPegasusForConditionalGeneration* على مجموعة بيانات PubMed | [Vasudev Gupta](https://github.com/vasudevgupta7) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vasudevgupta7/bigbird/blob/main/notebooks/bigbird_pegasus_evaluation.ipynb) |
-| [Speech Emotion Classification with Wav2Vec2](https://github.com/m3hrdadfi/soxan/blob/main/notebooks/Emotion_recognition_in_Greek_speech_using_Wav2Vec2.ipynb) | كيفية استخدام نموذج Wav2Vec2 المسبق التدريب لتصنيف المشاعر على مجموعة بيانات MEGA | [Mehrdad Farahani](https://github.com/m3hrdadfi) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/m3hrdadfi/soxan/blob/main/notebooks/Emotion_recognition_in_Greek_speech_using_Wav2Vec2.ipynb) |
-| [Detect objects in an image with DETR](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/DETR/DETR_minimal_example_(with_DetrFeatureExtractor).ipynb) | كيفية استخدام نموذج *DetrForObjectDetection* المدرب للكشف عن الأجسام في صورة وتصوير الانتباه | [Niels Rogge](https://github.com/NielsRogge) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/DETR/DETR_minimal_example_(with_DetrFeatureExtractor).ipynb) |
-| [Fine-tune DETR on a custom object detection dataset](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/DETR/Fine_tuning_DetrForObjectDetection_on_custom_dataset_(balloon).ipynb) | كيفية ضبط نموذج *DetrForObjectDetection* على مجموعة بيانات الكشف عن الأجسام المخصصة | [Niels Rogge](https://github.com/NielsRogge) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/DETR/Fine_tuning_DetrForObjectDetection_on_custom_dataset_(balloon).ipynb) |
-| [Finetune T5 for Named Entity Recognition](https://github.com/ToluClassics/Notebooks/blob/main/T5_Ner_Finetuning.ipynb) | كيفية ضبط نموذج *T5* على مهمة التعرف على الكيانات المسماة | [Ogundepo Odunayo](https://github.com/ToluClassics) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1obr78FY_cBmWY5ODViCmzdY6O1KB65Vc?usp=sharing) |
-| [Fine-Tuning Open-Source LLM using QLoRA with MLflow and PEFT](https://github.com/mlflow/mlflow/blob/master/docs/source/llms/transformers/tutorials/fine-tuning/transformers-peft.ipynb) | كيفية استخدام [QLoRA](https://github.com/artidoro/qlora) و [PEFT](https://huggingface.co/docs/peft/en/index) لضبط نموذج LLM بطريقة فعالة من حيث الذاكرة، مع استخدام [MLflow](https://mlflow.org/docs/latest/llms/transformers/index.html) لإدارة تتبع التجارب | [Yuki Watanabe](https://github.com/B-Step62) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mlflow/mlflow/blob/master/docs/source/llms/transformers/tutorials/fine-tuning/transformers-peft.ipynb) |
--- a/docs/source/ar/how_to_hack_models.md
+++ b/docs/source/ar/how_to_hack_models.md
@ -1,163 +0,0 @@
-# كيفية تعديل أي نموذج من نماذج Transformers
-
-توفر مكتبة [🤗 Transformers](https://github.com/huggingface/transformers) مجموعة من النماذج المسبقة التدريب والأدوات لمعالجة اللغات الطبيعية، والرؤية، وما إلى ذلك. على الرغم من أن هذه النماذج تغطي مجموعة واسعة من التطبيقات، فقد تواجه حالات استخدام لا تدعمها المكتبة بشكل افتراضي. يُمكن للتخصيص أن يفتح إمكانيات جديدة، مثل إضافة طبقات جديدة، أو تعديل البنية المعمارية، أو تحسين آليات الانتباه. سيُوضح لك هذا الدليل كيفية تعديل نماذج Transformers الموجودة لتلبية احتياجاتك المحددة. الشيء الرائع هو أنك لست بحاجة إلى الخروج من إطار عمل Transformers لإجراء هذه التغييرات. ي يمكنك تعديل النماذج مباشرةً في Transformers والاستفادة من الميزات مثل [واجهة برمجة التطبيقات Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer)، و [PreTrainedModel](https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel)، والضبط الدقيق الفعال باستخدام أدوات مثل [PEFT](https://huggingface.co/docs/peft/index).
-
-سنرشدك في هذا الدليل  لكيفية تخصيص نماذج Transformers الموجودة لتلبية متطلباتك، دون فقدان مزايا الإطار. ستتعلم كيفية:
-
- تعديل بنية نموذج ما من خلال تغيير آلية الانتباه الخاصة به.
- تطبيق تقنيات مثل Low-Rank Adaptation (LoRA) على مكونات نموذج محددة.
-
-نحن نشجعك على المساهمة باختراقاتك الخاصة ومشاركتها هنا مع المجتمع1
-
-## مثال: تعديل آلية الانتباه في نموذج Segment Anything (SAM)
-
-نموذج **Segment Anything (SAM)** هو نموذج رائد في مجال تجزئة الصور. في تنفيذه الافتراضي، يستخدم SAM إسقاطًا مجمعًا للاستعلام والمفتاح والقيمة (`qkv`) في آلية الانتباه الخاصة به. ومع ذلك، قد ترغب في ضبط مكونات محددة فقط من آلية الانتباه، مثل إسقاطات الاستعلام (`q`) والقيمة (`v`)، لتقليل عدد المعلمات القابلة للتدريب والموارد الحسابية المطلوبة.
-
-### الدافع
-
-من خلال تقسيم الإسقاط المجمع `qkv` إلى إسقاطات منفصلة `q` و `k` و `v`، يمكنك تطبيق تقنيات مثل **LoRA** (Low-Rank Adaptation) على إسقاطي `q` و `v` فقط. يسمح لك هذا بما يلي:
-
- ضبط عدد أقل من المعلمات، مما يقلل من العبء الحسابي.
- تحقيق أداء أفضل من خلال التركيز على مكونات محددة.
- تجربة استراتيجيات تعديل مختلفة في آلية الانتباه.
-
-### التنفيذ
-
-#### **الخطوة 1: إنشاء فئة اهتمام مخصصة**
-
-بعد ذلك، قم بإنشاء فئة فرعية من فئة `SamVisionAttention` الأصلية وعدلها لتضم إسقاطات `q` و `k` و `v` منفصلة.
-
-```python
-import torch
-import torch.nn as nn
-from transformers.models.sam.modeling_sam import SamVisionAttention
-
-class SamVisionAttentionSplit(SamVisionAttention, nn.Module):
-    def __init__(self, config, window_size):
-        super().__init__(config, window_size)
-        del self.qkv
-        # إسقاطات منفصلة q و k و v
-        self.q = nn.Linear(config.hidden_size, config.hidden_size, bias=config.qkv_bias)
-        self.k = nn.Linear(config.hidden_size, config.hidden_size, bias=config.qkv_bias)
-        self.v = nn.Linear(config.hidden_size, config.hidden_size, bias=config.qkv_bias)
-        self._register_load_state_dict_pre_hook(self.split_q_k_v_load_hook)
-
-    def split_q_k_v_load_hook(self, state_dict, prefix, *args):
-        keys_to_delete = []
-        for key in list(state_dict.keys()):
-            if "qkv." in key:
-                # تقسيم q و k و v من الإسقاط المجمع
-                q, k, v = state_dict[key].chunk(3, dim=0)
-                # استبدال الإسقاطات الفردية q و k و v
-                state_dict[key.replace("qkv.", "q.")] = q
-                state_dict[key.replace("qkv.", "k.")] = k
-                state_dict[key.replace("qkv.", "v.")] = v
-                # وضع علامة على مفتاح qkv القديم للحذف
-                keys_to_delete.append(key)
-        
-        # حذف مفاتيح qkv القديمة
-        for key in keys_to_delete:
-            del state_dict[key]
-
-    def forward(self, hidden_states: torch.Tensor, output_attentions=False) -> torch.Tensor:
-        batch_size, height, width, _ = hidden_states.shape
-        qkv_shapes = (batch_size *  self.num_attention_heads,  height * width, -1)
-        query = self.q(hidden_states).reshape((batch_size,  height * width,self.num_attention_heads, -1)).permute(0,2,1,3).reshape(qkv_shapes)
-        key = self.k(hidden_states).reshape((batch_size,  height * width,self.num_attention_heads, -1)).permute(0,2,1,3).reshape(qkv_shapes)
-        value = self.v(hidden_states).reshape((batch_size,  height * width,self.num_attention_heads, -1)).permute(0,2,1,3).reshape(qkv_shapes)
-
-        attn_weights = (query * self.scale) @ key.transpose(-2, -1)
-
-        if self.use_rel_pos:
-            attn_weights = self.add_decomposed_rel_pos(
-                attn_weights, query, self.rel_pos_h, self.rel_pos_w, (height, width), (height, width)
-            )
-
-        attn_weights = torch.nn.functional.softmax(attn_weights, dtype=torch.float32, dim=-1).to(query.dtype)
-        attn_probs = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training)
-        attn_output = (attn_probs @ value).reshape(batch_size, self.num_attention_heads, height, width, -1)
-        attn_output = attn_output.permute(0, 2, 3, 1, 4).reshape(batch_size, height, width, -1)
-        attn_output = self.proj(attn_output)
-
-        if output_attentions:
-            outputs = (attn_output, attn_weights)
-        else:
-            outputs = (attn_output, None)
-        return outputs
-```
-
-**الشرح:**
-
- **الإسقاطات المنفصلة:** يتم إزالة الإسقاط المُجمع `qkv`، وإنشاء إسقاطات خطية منفصلة `q` و `k` و `v`.
- **دالة استدعاء  تحميل الأوزان:** تقوم طريقة `_split_qkv_load_hook` بتقسيم أوزان `qkv` المسبقة التدريب إلى أوزان `q` و `k` و `v` منفصلة عند تحميل النموذج. يضمن هذا التوافق مع أي نموذج مسبق التدريب.
- **التنفيذ الأمامي:** يتم حساب الاستعلامات والمفاتيح والقيم بشكل منفصل، وتستمر آلية الانتباه كالمعتاد.
-
-#### **الخطوة 2: استبدال فئة الانتباه الأصلية**
-
-استبدل فئة `SamVisionAttention` الأصلية بفئتك المخصصة بحيث يستخدم النموذج آلية الانتباه المعدلة.
-
-```python
-from transformers import SamModel
-from transformers.models.sam import modeling_sam
-
-# استبدال فئة الاهتمام في وحدة نمطية modeling_sam
-modeling_sam.SamVisionAttention = SamVisionAttentionSplit
-
-# تحميل نموذج SAM المسبق التدريب
-model = SamModel.from_pretrained("facebook/sam-vit-base")
-```
-
-**الشرح:**
-
- **استبدال الفئة:** من خلال تعيين فئتك المخصصة إلى `modeling_sam.SamVisionAttention`، فإن أي حالات من فئة `SamVisionAttention` في النموذج ستستخدم النسخة المعدلة. وبالتالي، عند استدعاء `SamModel`، سيتم استخدام `SamVisionAttentionSplit` المحددة حديثًا.
- **تحميل النموذج:** يتم تحميل النموذج باستخدام `from_pretrained`، ويتم دمج آلية الانتباه المخصصة.
-
-#### **الخطوة 3: تطبيق LoRA على إسقاطات محددة**
-
-مع وجود إسقاطات `q` و `k` و `v` منفصلة، يمكنك الآن تطبيق LoRA على مكونات محددة، مثل إسقاطات `q` و `v`.
-
-```python
-from peft import LoraConfig, get_peft_model
-
-config = LoraConfig(
-    r=16,
-    lora_alpha=32,
-    target_modules=["q", "v"],  # تطبيق LoRA على إسقاطات q و v
-    lora_dropout=0.1,
-    task_type="mask-generation"
-)
-
-# تطبيق LoRA على النموذج
-model = get_peft_model(model, config)
-```
-
-**الشرح:**
-
- **تكوين LoRA:** تحدد `LoraConfig` المرتبة `r`، وعامل القياس `lora_alpha`، والوحدات المستهدفة (`"q"` و `"v"`)، ومعدل التخلي، ونوع المهمة.
- **تطبيق LoRA:** تقوم دالة `get_peft_model` بتطبيق LoRA على الوحدات المحددة في النموذج.
- **تقليل المعلمات:** من خلال التركيز على `q` و `v`، فإنك تقلل عدد المعلمات القابلة للتدريب، مما يؤدي إلى تسريع التدريب وتقليل استخدام الذاكرة.
-
-#### **الخطوة 4: التحقق من عدد المعلمات القابلة للتدريب**
-
-من السهل التحقق من عدد المعلمات القابلة للتدريب ومعرفة تأثير تعديلك.
-
-```python
-model.print_trainable_parameters()
-```
-
-**الناتج المتوقع:**
-
-```
-عدد المعلمات القابلة للتدريب: 608,256 || جميع المعلمات: 94,343,728 || نسبة المعلمات القابلة للتدريب: 0.6447
-عدد المعلمات القابلة للتدريب: 912,384 || جميع المعلمات: 94,647,856 || نسبة المعلمات القابلة للتدريب: 0.9640 # مع k
-```
-
-## المساهمة بابداعاتك الخاصة
-
-يمكن لتعديل النماذج المسبقة التدريب أن يفتح آفاقًا جديدة للبحث والتطبيق. من خلال فهم وتعديل الآليات الداخلية للنماذج مثل SAM، يمكنك تخصيصها لتلبية احتياجاتك المحددة، وتحسين الأداء، وتجربة أفكار جديدة.
-
-إذا قمت بتطوير تعديﻻتك الخاصة لنماذج Transformers وترغب في مشاركتها، ففكر في المساهمة في هذه الوثيقة.
-
- **إنشاء طلب سحب (Pull Request):** شارك تغييراتك وتحسيناتك في التعليمات البرمجية مباشرة في المستودع.
- **كتابة التوثيق:** قدم تفسيرات وأمثلة واضحة لتعديلاتك.
- **التفاعل مع المجتمع:** ناقش أفكارك واحصل على تعليقات من المطورين والباحثين الآخرين من خلال فتح مشكلة.
--- a/docs/source/ar/modular_transformers.md
+++ b/docs/source/ar/modular_transformers.md
@ -1,184 +0,0 @@
-# المحولات النمطية
-
-مكتبة `transformers` هي إطار عمل ذو فلسفة محدد؛ يتم تعريف فلسفتنا في [الدليل المفاهيمي](./philosophy).
-
-جوهر هذه الفلسفة يتمثل في مبدأ [نموذج واحد، ملف واحد](https://huggingface.co/blog/transformers-design-philosophy)
-في المكتبة. الجانب السلبي لهذا المكون هو تقييده لوراثة واستيراد مكونات الملفات.
-
-نتيجة لذلك، تتكرر مكونات النموذج عبر العديد من الملفات. يحتوي `transformers` على عدد كبير من طبقات الانتباه، يقارب عدد النماذج، والكثير منها متطابق.  يتسبب هذا في تباعد عمليات التنفيذ المستقلة مع تطبيق الإصلاحات والتغييرات.
-على أجزاء محددة من التعليمات البرمجية.
-
-ولمعالجة ذلك، اعتمدنا مفهوم "النسخ" في المكتبة.  فبإضافة تعليق يُشير إلى أن التعليمات البرمجية هي نسخة من أخرى، نضمن من خلال أنظمة  CI والأوامر المحلية عدم تباعد النسخ.  لكن هذه العملية، رغم بساطتها، تُسبب إرهاقاً.  كما أنها تزيد العبء على المساهمين، وهو ما نهدف إلى تجاوزه.
-
-غالباً ما تتطلب مساهمات النماذج إضافة تعليمات برمجية (حوالي 1000 سطر)، ومعالج (حوالي 500 سطر)، واختبارات، ووثائق، إلخ. ونادراً ما تقل مساهمات النماذج عن 3000-5000 سطر من التعليمات البرمجية،  معظمها أكواد نمطية.  هذا يرفع مستوى  المساهمات،
-
-ونهدف مع المحولات النمطية إلى خفض هذا المستوى إلى حدّ مقبول.
-
-## ما هو؟
-
-تقدم المحولات النمطية مفهوم ملف "نمطي" لمجلد نموذج. يقبل هذا الملف النمطي تعليمات برمجية
-غير مقبولة عادة في ملفات النمذجة/المعالجة، حيث يسمح بالاستيراد من نماذج مجاورة وكذلك
-الوراثة من الفئات إلى فئات أخرى.
-
-يعرّف هذا الملف النمطي النماذج والمعالجات وفئة التكوين التي سيتم تعريفها في وحداتهم
-المتعلقة.
-
-وأخيرًا، يقدم هذا الميزة أداة `linter` جديدة والتي ستعمل على "تفكيك" الملف النمطي إلى بنية "نموذج واحد، ملف واحد"
-هيكل الدليل. سيتم إنشاء هذه الملفات تلقائيًا في كل مرة يتم فيها تشغيل البرنامج النصي؛ مما يقلل من المساهمات المطلوبة
-إلى الملف النمطي، وبالتالي فقط إلى التغييرات بين النموذج المساهم والنماذج الأخرى.
-
-سيقوم مستخدمو النموذج في النهاية باستيراد واستخدام واجهة الملف الواحد، لذا لا يتوقع حدوث أي تغيير هنا. من خلال القيام بذلك،
-نأمل في الجمع بين أفضل ما في العالمين: تمكين المساهمات البسيطة مع الالتزام بفلسفتنا.
-
-لذلك، هذا بديل لعلامات `# Copied from`، ويمكن توقع انتقال النماذج المساهمة سابقًا إلى
-تنسيق المحولات النمطية الجديد في الأشهر المقبلة.
-
-### التفاصيل
-
-تُبسط أداة "linter" الوراثة، مُنشئةً جميع الملفات المفردة من الملف النمطي، مع الحفاظ على شفافيتها أمام مستخدمي Python. حاليًا، تُبسط الأداة مستوىً واحدًا من الوراثة
-
-على سبيل المثال:
- إذا ورثت فئة التكوين من فئة أخرى وأضافت/حذفت معامل، فسيتم إما الإشارة إلى الملف المولد مباشرةً
-  (في حالة الإضافة) أو إزالته تمامًا (في حالة الحذف).
- إذا ورثت فئة من فئة أخرى، على سبيل المثال: `class GemmaModel(LlamaModel):`، تُستنتج التبعيات تلقائيًا
-  سيتم استنتاج جميع الوحدات الفرعية تلقائيًا من الفئة الأصلية.
- إذا قمت بتعريف وظائف جديدة في الملف `modular` واستخدمتها داخل الفئات، فستستنتج أداة linter ذلك تلقائيًا
-
-يجب أن تكون قادرًا على كتابة كل شيء (المجزىء اللغوي، ومُعالِج الصور، والنموذج، والتكوين) في الملف `modular`، وسيتم إنشاء الملفات المُقابلة تلقائيًا.
-
-### التطبيق
-
-[TODO] نقدم اختبارًا جديدًا، للتأكد من أن المحتوى المولد يتطابق مع ما هو موجود في `modular_xxxx.py`
-
-### الأمثلة
-
-هنا مثال سريع باستخدام BERT و RoBERTa. النموذجان مرتبطان ارتباطًا وثيقًا: يختلف تنفيذهما النموذجي في طبقة تضمين.
-
-بدلاً من إعادة تعريف النموذج بالكامل، إليك كيف يبدو ملف `modular_roberta.py` لفئات النمذجة والتكوين (لأغراض المثال، يتم تجاهل المجزىء اللغوي في هذا الوقت حيث أنه مختلف جدًا).
-
-```python
-from torch import nn
-from ..bert.configuration_bert import BertConfig
-from ..bert.modeling_bert import (
-    BertModel,
-    BertEmbeddings,
-    BertForMaskedLM
-)
-
-# تكوين RoBERTa مطابق لتكوين BERT
-class RobertaConfig(BertConfig):
-  model_type = 'roberta'
-
-# نعيد تعريف الإضافات هنا لتسليط الضوء على اختلاف معرف الحشو، ونعيد تعريف الإضافات الموضعية
-class RobertaEmbeddings(BertEmbeddings):
-    def __init__(self, config):
-        super().__init__(config())
-
-        self.padding_idx = config.pad_token_id
-        self.position_embeddings = nn.Embedding(
-            config.max_position_embeddings, config.hidden_size, padding_idx=self.padding_idx
-        )
-
-# نموذج RoBERTa مطابق لنموذج BERT، باستثناء طبقة الإضافات.
-# نعيد تعريف الإضافات أعلاه، لذا هنا لا توجد حاجة لعمل إضافي
-class RobertaModel(BertModel):
-  def __init__(self, config):
-    super().__init__(config)
-    self.embeddings = RobertaEmbeddings(config)
-
-      
-# الرؤوس الآن تحتاج فقط إلى إعادة تعريف النموذج داخل `RobertaModel` الصحيح
-class RobertaForMaskedLM(BertForMaskedLM):
-  def __init__(self, config):
-    super().__init__(config)
-    self.model = RobertaModel(config)
-```
-
-لاحظ أنه إذا لم تستخدم الاعتماد الذي حددته، فستحصل على الخطأ التالي:
-
-```bash
-ValueError: You defined `RobertaEmbeddings` in the modular_roberta.py, it should be used
-                                    when you define `BertModel`, as it is one of it's direct dependencies. Make sure
-                                    you use it in the `__init__` function.
-```
-
-بالإضافة إلى ذلك، قد تجد قائمة بالأمثلة هنا:
-
-## ما هو ليس كذلك
-
-ليس بديلاً لتعليمات برمجة النمذجة (بعد؟)، وإذا لم يكن نموذجك يعتمد على أي شيء آخر موجود من قبل، فيمكنك إضافة ملف `نمذجة` كالعادة.
-
-
-## الاستخدام المتقدم
-
-### إزالة السمات والوظائف
-لإزالة السمات التي لا تستخدم في نموذجك النمطي، والتي لا تريد رؤيتها في النمذجة المفككة:
-
-```python
-class GemmaModel(LlamaModel):                 |           class GemmaModel(PreTrainedModel):
-    def __init__(self, config):               |              def __init__(self, config):
-        super().__init__(self, eos_token)     |                 super().__init__(config)
-        del self.embed_tokens                 |                 self.padding_idx = config.pad_token_id
-                                              |                 self.vocab_size = config.vocab_size
-                                              |
-                                              |                 self.layers = nn.ModuleList(
-                                              |                     [LlamaDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
-                                              |                 )
-                                              |                 self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
-                                              |                 self.rotary_emb = LlamaRotaryEmbedding(config=config)
-                                              |                 self.gradient_checkpointing = False
-                                              |                 
-                                              |                 # Initialize weights and apply final processing
-                                              |                 self.post_init()
-```
-إذا قمت بالتحقق من `LlamaModel` الأصلي، فستجد `embed_tokens` الذي تمت إزالته هنا (كما هو متوقع!)
-
-إزالة وظيفة مشابهة، تحتاج فقط إلى كتابتها مع `raise ValueError("")` لمحاكاة السلوك الذي تريده فعليًا عند إزالة وظيفة أصلية في بايثون.
-
-```python
-class GemmaTokenizer(LlamaTokenizer):
-    ...
-
-    def get_spm_processor(self):
-        raise AttributeError("Not needed for Gemma")
-
-    def unk_token_length(self):
-        raise AttributeError("Not needed for Gemma")
-```
-
-### تعريف وظائف جديدة
-
-إذا قمت بتعريف وظيفة جديدة في الملف `modular` لاستخدامها داخل فئة، على سبيل المثال
-
-```python
-def my_new_function(*args, **kwargs):
-  # Do something here
-  pass
-
-class GemmaModel(LlamaModel):
-    def forward(*args, **kwargs):
-      # Call the function
-      example = my_new_function(*args, **kwargs)
-      # continue here
-```
-
-سيتم نسخ وظيفة `my_new_function` (وبشكل متكرر، أي وظائف أخرى جديدة يتم استدعاؤها في جسمها) تلقائيًا
-في الملف الذي يتم استخدامه.
-
-### استدعاء `super()`
-قمنا مؤخرًا بشحن بعض الميزات التي تسمح لك بالانتقال من:
-```python
-class GemmaTokenizer(LlamaTokenizer, PretrainedTokenizerFast):         |           class GemmaModel(nn.Module):
-    def __init__(self, eos_token="</s>"):                              |             def __init__(self):
-        eos_token = AddedToken(eos_token)                              |                eos_token = AddedToken(eos_token)
-        PretrainedTokenizerFast.__init__(self, eos_token)              |                super().__init__(eos_token)
-```
-هذا مفيد عندما لا تريد تفكيك استدعاء `super()`، وتريد التمييز بين أي استدعاء super init تقوم به!
-
-### التسمية الخاصة
-ندعم الآن أيضًا حالات خاصة مثل
-```python
-class GemmaVisionModel(CLIPModel):                                 
-    pass
-```
-حيث اسم فئة `GemmaVision` الخاصة بك ليس هو نفسه `Gemma` النمطي. هذا مفيد للغاية للنماذج المركبة.
--- a/docs/source/ar/notebooks.md
+++ b/docs/source/ar/notebooks.md
@ -130,6 +130,7 @@
 | دفتر الملاحظات     |      الوصف      |   |   |
 |:----------|:-------------|:-------------|------:|
 | [كيفية تكميم نموذج باستخدام ONNX Runtime لتصنيف النص](https://github.com/huggingface/notebooks/blob/main/examples/text_classification_quantization_ort.ipynb)| يوضح كيفية تطبيق التكميم الثابت والديناميكي على نموذج باستخدام [ONNX Runtime](https://github.com/microsoft/onnxruntime) لأي مهمة GLUE. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification_quantization_ort.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/text_classification_quantization_ort.ipynb)|
+| [كيفية تكميم نموذج باستخدام Intel Neural Compressor لتصنيف النص](https://github.com/huggingface/notebooks/blob/main/examples/text_classification_quantization_inc.ipynb)| يوضح كيفية تطبيق التكميم الثابت والديناميكي والتدريبي على نموذج باستخدام [Intel Neural Compressor (INC)](https://github.com/intel/neural-compressor) لأي مهمة GLUE. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification_quantization_inc.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/text_classification_quantization_inc.ipynb)|
 | [كيفية ضبط نموذج بدقة على تصنيف النص باستخدام ONNX Runtime](https://github.com/huggingface/notebooks/blob/main/examples/text_classification_ort.ipynb)| يوضح كيفية معالجة البيانات مسبقًا وضبط نموذج بدقة على أي مهمة GLUE باستخدام [ONNX Runtime](https://github.com/microsoft/onnxruntime). | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification_ort.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/text_classification_ort.ipynb)|
 | [كيفية ضبط نموذج بدقة على التلخيص باستخدام ONNX Runtime](https://github.com/huggingface/notebooks/blob/main/examples/summarization_ort.ipynb)| يوضح كيفية معالجة البيانات مسبقًا وضبط نموذج بدقة على XSUM باستخدام [ONNX Runtime](https://github.com/microsoft/onnxruntime). | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/summarization_ort.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/summarization_ort.ipynb)|

--- a/docs/source/ar/quicktour.md
+++ b/docs/source/ar/quicktour.md
@ -347,8 +347,8 @@ tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],
 ```py
 >>> from transformers import AutoModel

->>> tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
->>> pt_model = AutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
+>>> tokenizer = AutoTokenizer.from_pretrained(tf_save_directory)
+>>> pt_model = AutoModelForSequenceClassification.from_pretrained(tf_save_directory, from_tf=True)
 ```
 </pt>
 <tf>
@ -356,8 +356,8 @@ tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],
 ```py
 >>> from transformers import TFAutoModel

->>> tokenizer = AutoTokenizer.from_pretrained(tf_save_directory)
->>> tf_model = TFAutoModelForSequenceClassification.from_pretrained(tf_save_directory, from_tf=True)
+>>> tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
+>>> tf_model = TFAutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
 ```
 </tf>
 </frameworkcontent>
--- a/docs/source/ar/tasks/language_modeling.md
+++ b/docs/source/ar/tasks/language_modeling.md
@ -1,422 +0,0 @@
-<!--Copyright 2022 The HuggingFace Team. All rights reserved.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-->
-
-# نمذجة اللغة السببية (Causal language modeling)
-
-[[open-in-colab]]
-
-هناك نوعان من نمذجة اللغة، السببية والمقنعة. يوضح هذا الدليل نمذجة اللغة السببية.
-تُستخدم نماذج اللغة السببية غالبًا لتوليد النص. يمكنك استخدام هذه النماذج للتطبيقات الإبداعية مثل
-اختيار مغامرة النص الخاصة بك أو مساعد ترميز ذكي مثل Copilot أو CodeParrot.
-
-<Youtube id="Vpjb1lu0MDk"/>
-
-تتنبأ نمذجة اللغة السببية بالرمز التالي في تسلسل من الرموز، ولا يمكن للنموذج سوى الاهتمام بالرموز على
-اليسار. هذا يعني أن النموذج لا يمكنه رؤية الرموز المستقبلية. GPT-2 هو مثال على نموذج اللغة السببية.
-
-سيوضح لك هذا الدليل كيفية:
-
-1. ضبط دقيق [DistilRoBERTa](https://huggingface.co/distilbert/distilroberta-base) على مجموعة فرعية [r/askscience](https://www.reddit.com/r/askscience/) من مجموعة بيانات [ELI5](https://huggingface.co/datasets/eli5).
-2.  استخدام النموذج المدرب الخاص بك للاستنتاج.
-
-<Tip>
-
-لرؤية جميع العمارات ونقاط التحقق المتوافقة مع هذه المهمة، نوصي بالتحقق من [task-page](https://huggingface.co/tasks/text-generation)
-
-</Tip>
-
-قبل أن تبدأ، تأكد من تثبيت جميع المكتبات الضرورية:
-
-```bash
-pip install transformers datasets evaluate
-```
-
-نحن نشجعك على تسجيل الدخول إلى حساب Hugging Face الخاص بك حتى تتمكن من تحميل ومشاركة نموذجك مع المجتمع. عند المطالبة، أدخل رمزك لتسجيل الدخول:
-
-```py
->>> from huggingface_hub import notebook_login
-
->>> notebook_login()
-```
-
-## تحميل مجموعة بيانات ELI5
-
-ابدأ بتحميل أول 5000 مثال من [ELI5-Category](https://huggingface.co/datasets/eli5_category) مجموعة البيانات مع مكتبة 🤗 Datasets. سيعطيك هذا فرصة للتجربة والتأكد من أن كل شيء يعمل قبل قضاء المزيد من الوقت في التدريب على مجموعة البيانات الكاملة.
-
-```py
->>> from datasets import load_dataset
-
->>> eli5 = load_dataset("eli5_category", split="train[:5000]")
-```
-
-قم بتقسيم مجموعة بيانات `train` إلى مجموعتي تدريب واختبار باستخدام الخاصية [`~datasets.Dataset.train_test_split`]:
-
-```py
->>> eli5 = eli5.train_test_split(test_size=0.2)
-```
-
-ثم ألق نظرة على مثال:
-
-```py
->>> eli5["train"][0]
-{'q_id': '7h191n',
- 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
- 'selftext': '',
- 'category': 'Economics',
- 'subreddit': 'explainlikeimfive',
- 'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
-  'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
-   'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
-   'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
-   'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
-  'score': [21, 19, 5, 3],
-  'text_urls': [[],
-   [],
-   [],
-   ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
- 'title_urls': ['url'],
- 'selftext_urls': ['url']}
-```
-
-على الرغم من أن هذا قد يبدو معقدًا، إلا أنك مهتم حقًا بحقل `text`. ما هو رائع حول مهام نمذجة اللغة
-أنت لا تحتاج إلى تسميات (تُعرف أيضًا باسم المهمة غير الخاضعة للإشراف) لأن الكلمة التالية تعمل كتسمية.
-
-## معالجة مسبقة (Preprocess)
-
-<Youtube id="ma1TrR7gE7I"/>
-
-الخطوة التالية هي تحميل مجزء النص DistilGPT2 لمعالجة حقل `text` الفرعي:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
-```
-
-ستلاحظ من المثال أعلاه، الحقل `text` هو في الواقع متداخل داخل `answers`. هذا يعني أنك ستحتاج إلى
-استخراج حقل `text` الفرعي من بنيته المتداخلة باستخدام الدالة [`flatten`](https://huggingface.co/docs/datasets/process#flatten):
-
-```py
->>> eli5 = eli5.flatten()
->>> eli5["train"][0]
-{'q_id': '7h191n',
- 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
- 'selftext': '',
- 'category': 'Economics',
- 'subreddit': 'explainlikeimfive',
- 'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
- 'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
-  'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
-  'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
-  'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
- 'answers.score': [21, 19, 5, 3],
- 'answers.text_urls': [[],
-  [],
-  [],
-  ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
- 'title_urls': ['url'],
- 'selftext_urls': ['url']}
-```
-
-كل حقل فرعي هو الآن عموداً منفصلاً مسبوقاً بـ `answers`، وحقل `text` هو قائمة الآن. بدلاً من ذلك
-من تجزائة نص كل جملة بشكل منفصل، قم بتحويل القائمة إلى سلسلة حتى تتمكن من تجزئة نصها بشكل مجمّع.
-
-هنا أول دالة معالجة مسبقة لدمج قائمة السلاسل لكل مثال ومجزىء النتيجة:
-
-```py
->>> def preprocess_function(examples):
-...     return tokenizer([" ".join(x) for x in examples["answers.text"]])
-```
-
-لتطبيق دالة المعالجة المسبقة هذه على مجموعة البيانات بأكملها، استخدم الدالة 🤗 Datasets [`~datasets.Dataset.map`]. يمكنك تسريع  هذه العملية `map` عن طريق تعيين `batched=True` لمعالجة عناصر متعددة من مجموعة البيانات في وقت واحد، وزيادة عدد العمليات مع `num_proc`. احذف أي أعمدة لا تحتاجها:
-
-```py
->>> tokenized_eli5 = eli5.map(
-...     preprocess_function,
-...     batched=True,
-...     num_proc=4,
-...     remove_columns=eli5["train"].column_names,
-... )
-```
-
-تحتوي هذه المجموعة من البيانات على تسلسلات الرموز، ولكن بعضها أطول من الطول الأقصى للمدخلات للنموذج.
-
-يمكنك الآن استخدام دالة ما قبل المعالجة ثانية لـ:
-
- تجميع كل التسلسلات.
- تقسيم التسلسلات المجمّعة إلى أجزاء أقصر محددة، بحجم `block_size`، والتي يجب أن تكون أقصر من الطول الأقصى للمدخلات ومناسبة لذاكرة GPU.
-
-```py
->>> block_size = 128
-
->>> def group_texts(examples):
-...     # ربط جميع النصوص.
-...     concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
-...     total_length = len(concatenated_examples[list(examples.keys())[0]])
-...     # نتجاهل الباقي الصغير، يمكننا إضافة الحشو إذا كان النموذج يدعمه بدلاً من هذا الإسقاط، يمكنك
-...     # تخصيص هذا الجزء حسب احتياجاتك.
-...     if total_length >= block_size:
-...         total_length = (total_length // block_size) * block_size
-...     # التقسيم إلى أجزاء بحجم block_size.
-...     result = {
-...         k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
-...         for k, t in concatenated_examples.items()
-...     }
-...     result["labels"] = result["input_ids"].copy()
-...     return result
-```
-
-طبق دالة `group_texts` على كامل المجموعة من البيانات:
-
-```py
->>> lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
-```
-
-الآن قم بإنشاء دفعة من الأمثلة باستخدام [`DataCollatorForLanguageModeling`]. من الأفضل أن تقوم بـ *الحشو الديناميكي* للجمل إلى الطول الأطول في الدفعة أثناء التجميع، بدلاً من حشو كامل المجموعة من البيانات إلى الطول الأقصى.
-
-<frameworkcontent>
-<pt>
-استخدم رمز نهاية التسلسل كرمز للحشو، وحدد `mlm_probability` لحجب الرموز بشكل عشوائي عند كل تكرار للبيانات:
-
-```py
->>> from transformers import DataCollatorForLanguageModeling
-
->>> tokenizer.pad_token = tokenizer.eos_token
->>> data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
-```
-
-</pt>
-<tf>
-استخدم رمز نهاية التسلسل كرمز للحشو، وحدد `mlm_probability` لحجب الرموز بشكل عشوائي عند كل تكرار للبيانات:
-
-```py
->>> from transformers import DataCollatorForLanguageModeling
-
->>> data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False, return_tensors="tf")
-```
-
-</tf>
-</frameworkcontent>
-
-## التدريب (Train)
-
-<frameworkcontent>
-<pt>
-
-<Tip>
-
-إذا لم تكن على دراية بتدريب نموذج باستخدام [`Trainer`], اطلع على [البرنامج التعليمي الأساسي](../training#train-with-pytorch-trainer)!
-
-</Tip>
-
-أنت جاهز الآن لبدء تدريب نموذجك! قم بتحميل DistilGPT2 باستخدام [`AutoModelForCausalLM`]:
-
-```py
->>> from transformers import AutoModelForCausalLM, TrainingArguments, Trainer
-
->>> model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
-```
-
-في هذه المرحلة، تبقى ثلاث خطوات فقط:
-
-1. حدد معلمات التدريب الخاصة بك في [`TrainingArguments`]. المعامل الوحيد المطلوب هو `output_dir` الذي يحدد أين سيتم حفظ نموذجك. ستقوم بدفع هذا النموذج إلى Hub بتحديد `push_to_hub=True` (يجب أن تكون مسجلاً الدخول إلى Hugging Face لتحميل نموذجك).
-2. قم بتمرير معاملات التدريب إلى [`Trainer`] إلى جانب النموذج، والمجموعات من البيانات، ومجمّع البيانات.
-3. قم باستدعاء [`~Trainer.train`] لتدريب نموذجك.
-
-```py
->>> training_args = TrainingArguments(
-...     output_dir="my_awesome_eli5_clm-model",
-...     eval_strategy="epoch",
-...     learning_rate=2e-5,
-...     weight_decay=0.01,
-...     push_to_hub=True,
-... )
-
->>> trainer = Trainer(
-...     model=model,
-...     args=training_args,
-...     train_dataset=lm_dataset["train"],
-...     eval_dataset=lm_dataset["test"],
-...     data_collator=data_collator,
-...     tokenizer=tokenizer,
-... )
-
->>> trainer.train()
-```
-
-بمجرد اكتمال التدريب، استخدم طريقة [`~transformers.Trainer.evaluate`] لتقييم نموذجك والحصول على احتمالية الارتباك:
-
-```py
->>> import math
-
->>> eval_results = trainer.evaluate()
->>> print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
-Perplexity: 49.61
-```
-
-ثم شارك نموذجك على Hub باستخدام طريقة [`~transformers.Trainer.push_to_hub`] حتى يتمكن الجميع من استخدام نموذجك:
-
-```py
->>> trainer.push_to_hub()
-```
-</pt>
-<tf>
-<Tip>
-
-إذا لم تكن على دراية بتدريب نموذج باستخدام Keras، اطلع على [البرنامج التعليمي الأساسي](../training#train-a-tensorflow-model-with-keras)!
-
-</Tip>
-لتدريب نموذج في TensorFlow، ابدأ بإعداد دالة المحسن، وجدول معدل التعلم، وبعض معاملات التدريب:
-
-```py
->>> from transformers import create_optimizer, AdamWeightDecay
-
->>> optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
-```
-
-ثم يمكنك تحميل DistilGPT2 باستخدام [`TFAutoModelForCausalLM`]:
-
-```py
->>> from transformers import TFAutoModelForCausalLM
-
->>> model = TFAutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
-```
-
-حول مجموعات بياناتك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
-
-```py
->>> tf_train_set = model.prepare_tf_dataset(
-...     lm_dataset["train"],
-...     shuffle=True,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-
->>> tf_test_set = model.prepare_tf_dataset(
-...     lm_dataset["test"],
-...     shuffle=False,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-```
-
-قم بتهيئة النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن جميع نماذج Transformers لديها دالة خسارة ذات صلة بالمهمة الافتراضية، لذلك لا تحتاج إلى تحديد واحدة ما لم ترغب في ذلك:
-
-```py
->>> import tensorflow as tf
-
->>> model.compile(optimizer=optimizer)  # لا يوجد حجة للخسارة!
-```
-
-يمكن القيام بذلك عن طريق تحديد مكان دفع نموذجك ومجمّع البيانات في [`~transformers.PushToHubCallback`]:
-
-```py
->>> from transformers.keras_callbacks import PushToHubCallback
-
->>> callback = PushToHubCallback(
-...     output_dir="my_awesome_eli5_clm-model",
-...     tokenizer=tokenizer,
-... )
-```
-
-أخيراً، أنت جاهز لبدء تدريب نموذجك! قم باستدعاء [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق من الصحة، وعدد العصور، والتعليقات الخاصة بك لتدريب النموذج:
-
-```py
->>> model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
-```
-
-بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
-</tf>
-</frameworkcontent>
-
-<Tip>
-
-للحصول على مثال أكثر تعمقًا حول كيفية تدريب نموذج للنمذجة اللغوية السببية، اطلع على الدفتر المقابل
-[دفتر PyTorch](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)
-أو [دفتر TensorFlow](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling-tf.ipynb).
-
-</Tip>
-
-## الاستدلال (Inference)
-
-رائع، الآن بعد أن قمت بتدريب نموذج، يمكنك استخدامه للاستدلال!
-
-قم بابتكار سؤال تود توليد نص منه:
-
-```py
->>> prompt = "Somatic hypermutation allows the immune system to"
-```
-
-أبسط طريقة لتجربة نموذجك المدرب للاستدلال هي استخدامه في [`pipeline`]. قم بتنفيذ `pipeline` لتوليد النص مع نموذجك، ومرر نصك إليه:
-
-```py
->>> from transformers import pipeline
-
->>> generator = pipeline("text-generation", model="username/my_awesome_eli5_clm-model")
->>> generator(prompt)
-[{'generated_text': "Somatic hypermutation allows the immune system to be able to effectively reverse the damage caused by an infection.\n\n\nThe damage caused by an infection is caused by the immune system's ability to perform its own self-correcting tasks."}]
-```
-
-<frameworkcontent>
-<pt>
-قسم النص وإرجع `input_ids` كتنسورات PyTorch:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
->>> inputs = tokenizer(prompt, return_tensors="pt").input_ids
-```
-
-استخدم طريقة [`~generation.GenerationMixin.generate`] لتوليد النص.
-للمزيد من التفاصيل حول استراتيجيات توليد النص المختلفة والبارامترات للتحكم في التوليد، راجع صفحة [استراتيجيات توليد النص](../generation_strategies).
-
-```py
->>> from transformers import AutoModelForCausalLM
-
->>> model = AutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
->>> outputs = model.generate(inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
-```
-
-فك ترميز الرموز المولدة مرة أخرى إلى نص:
-
-```py
->>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
-["Somatic hypermutation allows the immune system to react to drugs with the ability to adapt to a different environmental situation. In other words, a system of 'hypermutation' can help the immune system to adapt to a different environmental situation or in some cases even a single life. In contrast, researchers at the University of Massachusetts-Boston have found that 'hypermutation' is much stronger in mice than in humans but can be found in humans, and that it's not completely unknown to the immune system. A study on how the immune system"]
-```
-</pt>
-<tf>
-قم بتقسيم النص وإرجاع `input_ids` كـ TensorFlow tensors:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
->>> inputs = tokenizer(prompt, return_tensors="tf").input_ids
-```
-
-استخدم طريقة [`~transformers.generation_tf_utils.TFGenerationMixin.generate`] لإنشاء الملخص. للمزيد من التفاصيل حول استراتيجيات توليد النص المختلفة والبارامترات للتحكم في التوليد، راجع صفحة [استراتيجيات توليد النص](../generation_strategies).
-
-```py
->>> from transformers import TFAutoModelForCausalLM
-
->>> model = TFAutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
->>> outputs = model.generate(input_ids=inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
-```
-
-فك ترميز  الرموز المولدة مرة أخرى إلى نص:
-
-```py
->>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
-['Somatic hypermutation allows the immune system to detect the presence of other viruses as they become more prevalent. Therefore, researchers have identified a high proportion of human viruses. The proportion of virus-associated viruses in our study increases with age. Therefore, we propose a simple algorithm to detect the presence of these new viruses in our samples as a sign of improved immunity. A first study based on this algorithm, which will be published in Science on Friday, aims to show that this finding could translate into the development of a better vaccine that is more effective for']
-```
-</tf>
-</frameworkcontent>
--- a/docs/source/ar/tasks/masked_language_modeling.md
+++ b/docs/source/ar/tasks/masked_language_modeling.md
@ -1,442 +0,0 @@
-<!--Copyright 2022 The HuggingFace Team. All rights reserved.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-->
-
-# نمذجة اللغة المقنعة (Masked language modeling)
-
-[[open-in-colab]]
-
-<Youtube id="mqElG5QJWUg"/>
-
-تتنبأ نمذجة اللغة المقنعة برمز مقنع في تسلسل، ويمكن للنموذج الانتباه إلى الرموز بشكل ثنائي الاتجاه. هذا
-يعني أن النموذج لديه إمكانية الوصول الكاملة إلى الرموز الموجودة على اليسار واليمين. تعد نمذجة اللغة المقنعة ممتازة للمهام التي
-تتطلب فهمًا سياقيًا جيدًا لتسلسل كامل. BERT هو مثال على نموذج لغة مقنع.
-
-سيوضح لك هذا الدليل كيفية:
-
-1. تكييف [DistilRoBERTa](https://huggingface.co/distilbert/distilroberta-base) على مجموعة فرعية [r/askscience](https://www.reddit.com/r/askscience/) من مجموعة بيانات [ELI5](https://huggingface.co/datasets/eli5).
-2. استخدام نموذج المدرب الخاص بك للاستدلال.
-
-<Tip>
-
-لمعرفة جميع البنى والنسخ المتوافقة مع هذه المهمة، نوصي بالتحقق من [صفحة المهمة](https://huggingface.co/tasks/fill-mask)
-
-</Tip>
-
-قبل أن تبدأ، تأكد من تثبيت جميع المكتبات الضرورية:
-
-```bash
-pip install transformers datasets evaluate
-```
-
-نحن نشجعك على تسجيل الدخول إلى حساب Hugging Face الخاص بك حتى تتمكن من تحميل ومشاركة نموذجك مع المجتمع. عندما تتم مطالبتك، أدخل رمزك لتسجيل الدخول:
-
-```py
->>> from huggingface_hub import notebook_login
-
->>> notebook_login()
-```
-
-## تحميل مجموعة بيانات ELI5
-
-ابدأ بتحميل أول 5000 مثال من مجموعة بيانات [ELI5-Category](https://huggingface.co/datasets/eli5_category) باستخدام مكتبة 🤗 Datasets. سيعطيك هذا فرصة للتجربة والتأكد من أن كل شيء يعمل قبل قضاء المزيد من الوقت في التدريب على مجموعة البيانات الكاملة.
-
-```py
->>> from datasets import load_dataset
-
->>> eli5 = load_dataset("eli5_category", split="train[:5000]")
-```
-
-قم بتقسيم مجموعة البيانات `train` إلى مجموعتي تدريب واختبار باستخدام الدالة [`~datasets.Dataset.train_test_split`]:
-
-```py
->>> eli5 = eli5.train_test_split(test_size=0.2)
-```
-
-ثم ألق نظرة على مثال:
-
-```py
->>> eli5["train"][0]
-{'q_id': '7h191n',
- 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
- 'selftext': '',
- 'category': 'Economics',
- 'subreddit': 'explainlikeimfive',
- 'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
-  'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
-   'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
-   'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
-   'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
-  'score': [21, 19, 5, 3],
-  'text_urls': [[],
-   [],
-   [],
-   ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
- 'title_urls': ['url'],
- 'selftext_urls': ['url']}
-```
-
-على الرغم من أن هذا قد يبدو كثيرًا، إلا أنك مهتم حقًا بحقل `text`. ما هو رائع حول مهام نمذجة اللغة هو أنك لا تحتاج إلى تسميات (تُعرف أيضًا باسم المهمة غير الخاضعة للإشراف) لأن الكلمة التالية *هي* التسمية.
-
-## معالجة مسبقة (Preprocess)
-
-<Youtube id="8PmhEIXhBvI"/>
-
-بالنسبة لنمذجة اللغة المقنعة، فإن الخطوة التالية هي تحميل معالج DistilRoBERTa لمعالجة حقل `text` الفرعي:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilroberta-base")
-```
-
-ستلاحظ من المثال أعلاه، أن حقل `text` موجود بالفعل داخل `answers`. هذا يعني أنك ستحتاج إلى استخراج حقل `text` الفرعي من بنيته المضمنة باستخدام الدالة [`flatten`](https://huggingface.co/docs/datasets/process#flatten):
-
-```py
->>> eli5 = eli5.flatten()
->>> eli5["train"][0]
-{'q_id': '7h191n',
- 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
- 'selftext': '',
- 'category': 'Economics',
- 'subreddit': 'explainlikeimfive',
- 'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
- 'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
-  'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
-  'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
-  'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
- 'answers.score': [21, 19, 5, 3],
- 'answers.text_urls': [[],
-  [],
-  [],
-  ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
- 'title_urls': ['url'],
- 'selftext_urls': ['url']}
-```
-
-كل حقل فرعي هو الآن عمود منفصل كما هو موضح بواسطة بادئة `answers`، وحقل `text` هو قائمة الآن. بدلاً من
-معالجة كل جملة بشكل منفصل، قم بتحويل القائمة إلى سلسلة حتى تتمكن من معالجتها بشكل مشترك.
-
-هنا أول دالة معالجة مسبقة لربط قائمة السلاسل لكل مثال ومعالجة النتيجة:
-
-```py
->>> def preprocess_function(examples):
-...     return tokenizer([" ".join(x) for x in examples["answers.text"]])
-```
-
-لتطبيق دالة المعالجة المسبقة على مجموعة البيانات بأكملها، استخدم الدالة 🤗 Datasets [`~datasets.Dataset.map`]. يمكنك تسريع دالة `map` عن طريق تعيين `batched=True` لمعالجة عدة عناصر في وقت واحد، وزيادة عدد العمليات باستخدام `num_proc`. احذف أي أعمدة غير ضرورية:
-
-```py
->>> tokenized_eli5 = eli5.map(
-...     preprocess_function,
-...     batched=True,
-...     num_proc=4,
-...     remove_columns=eli5["train"].column_names,
-... )
-```
-
-
-تحتوي مجموعة البيانات هذه على تسلسلات رمزية، ولكن بعضها أطول من الطول الأقصى للمدخلات للنموذج.
-
-يمكنك الآن استخدام دالة معالجة مسبقة ثانية لـ:
- تجميع جميع التسلسلات
- تقسيم التسلسلات المجمّعة إلى أجزاء أقصر محددة بـ `block_size`، والتي يجب أن تكون أقصر من الحد الأقصى لطول المدخلات ومناسبة لذاكرة GPU.
-
-```py
->>> block_size = 128
-
->>> def group_texts(examples):
-...     # تجميع جميع النصوص.
-...     concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
-...     total_length = len(concatenated_examples[list(examples.keys())[0]])
-...     # نتجاهل الجزء المتبقي الصغير، يمكننا إضافة الحشو إذا كان النموذج يدعمه بدلاً من هذا الإسقاط، يمكنك
-...     # تخصيص هذا الجزء حسب احتياجاتك.
-...     if total_length >= block_size:
-...         total_length = (total_length // block_size) * block_size
-...     # تقسيمها إلى أجزاء بحجم block_size.
-...     result = {
-...         k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
-...         for k, t in concatenated_examples.items()
-...     }
-...     return result
-```
-
-طبق دالة `group_texts` على مجموعة البيانات بأكملها:
-
-```py
->>> lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
-```
-
-الآن، قم بإنشاء دفعة من الأمثلة باستخدام [`DataCollatorForLanguageModeling`]. من الأكثر كفاءة أن تقوم بـ *الحشو الديناميكي* ليصل طولها إلى أطول جملة في الدفعة أثناء التجميع، بدلاً من حشو مجموعة البيانات بأكملها إلى الطول الأقصى.
-
-<frameworkcontent>
-<pt>
-
-استخدم رمز نهاية التسلسل كرمز الحشو وحدد `mlm_probability` لحجب الرموز عشوائياً كل مرة تكرر فيها البيانات:
-
-```py
->>> from transformers import DataCollatorForLanguageModeling
-
->>> tokenizer.pad_token = tokenizer.eos_token
->>> data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)
-```
-</pt>
-<tf>
-
-استخدم رمز نهاية التسلسل كرمز الحشو وحدد `mlm_probability` لحجب الرموز عشوائياً كل مرة تكرر فيها البيانات:
-
-```py
->>> from transformers import DataCollatorForLanguageModeling
-
->>> data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15, return_tensors="tf")
-```
-</tf>
-</frameworkcontent>
-
-## التدريب (Train)
-
-<frameworkcontent>
-<pt>
-
-<Tip>
-
-إذا لم تكن على دراية بتعديل نموذج باستخدام [`Trainer`], ألق نظرة على الدليل الأساسي [هنا](../training#train-with-pytorch-trainer)!
-
-</Tip>
-
-أنت مستعد الآن لبدء تدريب نموذجك! قم بتحميل DistilRoBERTa باستخدام [`AutoModelForMaskedLM`]:
-
-```py
->>> from transformers import AutoModelForMaskedLM
-
->>> model = AutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
-```
-
-في هذه المرحلة، تبقى ثلاث خطوات فقط:
-
-1. حدد معلمات التدريب الخاصة بك في [`TrainingArguments`]. المعلمة الوحيدة المطلوبة هي `output_dir` والتي تحدد مكان حفظ نموذجك. ستقوم بدفع هذا النموذج إلى Hub عن طريق تعيين `push_to_hub=True` (يجب أن تكون مسجلاً الدخول إلى Hugging Face لتحميل نموذجك).
-2. قم بتمرير معلمات التدريب إلى [`Trainer`] مع النموذج، ومجموعات البيانات، ومجمّع البيانات.
-3. قم باستدعاء [`~Trainer.train`] لتعديل نموذجك.
-
-```py
->>> training_args = TrainingArguments(
-...     output_dir="my_awesome_eli5_mlm_model",
-...     eval_strategy="epoch",
-...     learning_rate=2e-5,
-...     num_train_epochs=3,
-...     weight_decay=0.01,
-...     push_to_hub=True,
-... )
-
->>> trainer = Trainer(
-...     model=model,
-...     args=training_args,
-...     train_dataset=lm_dataset["train"],
-...     eval_dataset=lm_dataset["test"],
-...     data_collator=data_collator,
-...     tokenizer=tokenizer,
-... )
-
->>> trainer.train()
-```
-
-بمجرد اكتمال التدريب، استخدم طريقة [`~transformers.Trainer.evaluate`] لتقييم النموذج والحصول على مقياس
-    الحيرة:
-
-```py
->>> import math
-
->>> eval_results = trainer.evaluate()
->>> print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
-Perplexity: 8.76
-```
-
-ثم شارك نموذجك على Hub باستخدام طريقة [`~transformers.Trainer.push_to_hub`] حتى يتمكن الجميع من استخدام نموذجك:
-
-```py
->>> trainer.push_to_hub()
-```
-</pt>
-<tf>
-<Tip>
-
-إذا لم تكن على دراية بتعديل نموذج باستخدام Keras، ألق نظرة على الدليل الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
-
-</Tip>
-لتعديل نموذج في TensorFlow، ابدأ بإعداد دالة محسن، وجدول معدل التعلم، وبعض معلمات التدريب:
-
-```py
->>> from transformers import create_optimizer, AdamWeightDecay
-
->>> optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
-```
-
-ثم يمكنك تحميل DistilRoBERTa باستخدام [`TFAutoModelForMaskedLM`]:
-
-```py
->>> from transformers import TFAutoModelForMaskedLM
-
->>> model = TFAutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
-```
-
-قم بتحويل مجموعات بياناتك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
-
-```py
->>> tf_train_set = model.prepare_tf_dataset(
-...     lm_dataset["train"],
-...     shuffle=True,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-
->>> tf_test_set = model.prepare_tf_dataset(
-...     lm_dataset["test"],
-...     shuffle=False,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-```
-
-قم بتهيئة النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن نماذج Transformers لديها جميعها دالة خسارة افتراضية ذات صلة بالمهمة، لذلك لا تحتاج إلى تحديد واحدة ما لم تكن تريد ذلك:
-
-```py
->>> import tensorflow as tf
-
->>> model.compile(optimizer=optimizer)  # لا توجد حجة للخسارة!
-```
-
-يمكن القيام بذلك عن طريق تحديد مكان دفع نموذجك ومعالج الرموز في [`~transformers.PushToHubCallback`]:
-
-```py
->>> from transformers.keras_callbacks import PushToHubCallback
-
->>> callback = PushToHubCallback(
-...     output_dir="my_awesome_eli5_mlm_model",
-...     tokenizer=tokenizer,
-... )
-```
-
-أخيراً، أنت مستعد لبدء تدريب نموذجك! قم باستدعاء [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق، وعدد العصور، والتعليقات الخاصة بك لتعديل النموذج:
-
-```py
->>> model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
-```
-
-بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائياً إلى Hub حتى يتمكن الجميع من استخدامه!
-</tf>
-</frameworkcontent>
-
-<Tip>
-
-لمثال أكثر تفصيلاً حول كيفية تعديل نموذج للنمذجة اللغوية المقنعة، ألق نظرة على الدفتر المقابل
-[دفتر PyTorch](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling.ipynb)
-أو [دفتر TensorFlow](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/language_modeling-tf.ipynb).
-
-</Tip>
-
-## الاستدلال
-
-رائع، الآن بعد أن قمت بتعديل نموذج، يمكنك استخدامه للاستدلال!
-
-جهّز بعض النصوص التي تريد أن يملأ النموذج الفراغات فيها، واستخدم الرمز الخاص `<mask>` للإشارة إلى الفراغ:
-
-```py
->>> text = "The Milky Way is a <mask> galaxy."
-```
-
-أبسط طريقة لتجربة نموذجك المعدل للاستدلال هي استخدامه في [`pipeline`]. قم بإنشاء كائن  `pipeline` لملء الفراغ مع نموذجك، ومرر نصك إليه. إذا أردت، يمكنك استخدام معلمة `top_k` لتحديد عدد التنبؤات التي تريد إرجاعها:
-
-```py
->>> from transformers import pipeline
-
->>> mask_filler = pipeline("fill-mask", "username/my_awesome_eli5_mlm_model")
->>> mask_filler(text, top_k=3)
-[{'score': 0.5150994658470154,
-  'token': 21300,
-  'token_str': ' spiral',
-  'sequence': 'The Milky Way is a spiral galaxy.'},
- {'score': 0.07087188959121704,
-  'token': 2232,
-  'token_str': ' massive',
-  'sequence': 'The Milky Way is a massive galaxy.'},
- {'score': 0.06434620916843414,
-  'token': 650,
-  'token_str': ' small',
-  'sequence': 'The Milky Way is a small galaxy.'}]
-```
-
-<frameworkcontent>
-<pt>
-قم بتجزئة النص وإرجاع `input_ids` كمتجهات PyTorch. ستحتاج أيضًا إلى تحديد موضع رمز `<mask>`:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
->>> inputs = tokenizer(text, return_tensors="pt")
->>> mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
-```
-
-قم بتمرير المدخلات إلى النموذج وإرجاع `logits` للرمز المقنع:
-
-```py
->>> from transformers import AutoModelForMaskedLM
-
->>> model = AutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
->>> logits = model(**inputs).logits
->>> mask_token_logits = logits[0, mask_token_index, :]
-```
-
-ثم قم بإرجاع الرموز الثلاثة المقنعة ذات الاحتمالية الأعلى وطباعتها:
-
-```py
->>> top_3_tokens = torch.topk(mask_token_logits, 3, dim=1).indices[0].tolist()
-
->>> for token in top_3_tokens:
-...     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
-The Milky Way is a spiral galaxy.
-The Milky Way is a massive galaxy.
-The Milky Way is a small galaxy.
-```
-</pt>
-<tf>
-قم بتقسيم النص إلى رموز وإرجاع `input_ids` كـ TensorFlow tensors. ستحتاج أيضًا إلى تحديد موضع رمز `<mask>`:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
->>> inputs = tokenizer(text, return_tensors="tf")
->>> mask_token_index = tf.where(inputs["input_ids"] == tokenizer.mask_token_id)[0, 1]
-```
-
-قم بتمرير المدخلات إلى النموذج وإرجاع `logits` للرمز المقنع:
-
-```py
->>> from transformers import TFAutoModelForMaskedLM
-
->>> model = TFAutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
->>> logits = model(**inputs).logits
->>> mask_token_logits = logits[0, mask_token_index, :]
-```
-
-ثم قم بإرجاع الرموز الثلاثة المقنعة ذات الاحتمالية الأعلى وطباعتها:
-
-```py
->>> top_3_tokens = tf.math.top_k(mask_token_logits, 3).indices.numpy()
-
->>> for token in top_3_tokens:
-...     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
-The Milky Way is a spiral galaxy.
-The Milky Way is a massive galaxy.
-The Milky Way is a small galaxy.
-```
-</tf>
-</frameworkcontent>
--- a/docs/source/ar/tasks/multiple_choice.md
+++ b/docs/source/ar/tasks/multiple_choice.md
@ -1,452 +0,0 @@
-<!--Copyright 2022 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# الاختيار من متعدد (Multiple choice)
-
-[[open-in-colab]]
-
-مهمة الاختيار من متعدد مشابهة لمهمة الإجابة على الأسئلة، ولكن مع توفير عدة إجابات محتملة مع سياق، ويُدرّب النموذج على تحديد الإجابة الصحيحة.
-
-سيوضح لك هذا الدليل كيفية:
-
-1. ضبط نموذج [BERT](https://huggingface.co/google-bert/bert-base-uncased)  باستخدام الإعداد `regular` لمجموعة بيانات [SWAG](https://huggingface.co/datasets/swag) لاختيار الإجابة الأفضل من بين الخيارات المتعددة المتاحة مع السياق.
-2. استخدام النموذج المضبوط للاستدلال.
-
-قبل البدء، تأكد من تثبيت جميع المكتبات الضرورية:
-
-```bash
-pip install transformers datasets evaluate
-```
-
-نشجعك على تسجيل الدخول إلى حساب Hugging Face الخاص بك حتى تتمكن من تحميل نموذجك ومشاركته مع المجتمع. عند المطالبة، أدخل الرمز المميز الخاص بك لتسجيل الدخول:
-
-```py
->>> from huggingface_hub import notebook_login
-
->>> notebook_login()
-```
-
-## تحميل مجموعة بيانات SWAG
-
-ابدأ بتحميل تهيئة `regular` لمجموعة بيانات SWAG من مكتبة 🤗 Datasets:
-
-```py
->>> from datasets import load_dataset
-
->>> swag = load_dataset("swag", "regular")
-```
-
-ثم ألق نظرة على مثال:
-
-```py
->>> swag["train"][0]
-{'ending0': 'passes by walking down the street playing their instruments.',
- 'ending1': 'has heard approaching them.',
- 'ending2': "arrives and they're outside dancing and asleep.",
- 'ending3': 'turns the lead singer watches the performance.',
- 'fold-ind': '3416',
- 'gold-source': 'gold',
- 'label': 0,
- 'sent1': 'Members of the procession walk down the street holding small horn brass instruments.',
- 'sent2': 'A drum line',
- 'startphrase': 'Members of the procession walk down the street holding small horn brass instruments. A drum line',
- 'video-id': 'anetv_jkn6uvmqwh4'}
-```
-
-على الرغم من أن الحقول تبدو كثيرة، إلا أنها في الواقع بسيطة جداً:
-
- `sent1` و `sent2`: يعرض هذان الحقلان بداية الجملة، وبدمجهما معًا، نحصل على حقل `startphrase`.
- `ending`: يقترح نهاية محتملة للجملة، واحدة منها فقط هي الصحيحة.
- `label`: يحدد نهاية الجملة الصحيحة.
-
-## المعالجة المسبقة (Preprocess)
-
-الخطوة التالية هي استدعاء مُجزئ BERT لمعالجة بدايات الجمل والنهايات الأربع المحتملة:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
-```
-
-تحتاج دالة المعالجة المسبقة التي تريد إنشاءها إلى:
-
-1.  إنشاء أربع نسخ من حقل `sent1` ودمج كل منها مع `sent2` لإعادة إنشاء كيفية بدء الجملة.
-2. دمج `sent2` مع كل من نهايات الجمل الأربع المحتملة.
-3. تتجميع هاتين القائمتين لتتمكن من تجزئتهما، ثم إعادة ترتيبها بعد ذلك بحيث يكون لكل مثال حقول `input_ids` و `attention_mask` و `labels` مقابلة.
-
-
-```py
->>> ending_names = ["ending0", "ending1", "ending2", "ending3"]
-
->>> def preprocess_function(examples):
-...     first_sentences = [[context] * 4 for context in examples["sent1"]]
-...     question_headers = examples["sent2"]
-...     second_sentences = [
-...         [f"{header} {examples[end][i]}" for end in ending_names] for i, header in enumerate(question_headers)
-...     ]
-
-...     first_sentences = sum(first_sentences, [])
-...     second_sentences = sum(second_sentences, [])
-
-...     tokenized_examples = tokenizer(first_sentences, second_sentences, truncation=True)
-...     return {k: [v[i : i + 4] for i in range(0, len(v), 4)] for k, v in tokenized_examples.items()}
-```
-
-لتطبيق دالة المعالجة المسبقة على مجموعة البيانات بأكملها، استخدم طريقة [`~datasets.Dataset.map`] الخاصة بـ 🤗 Datasets. يمكنك تسريع دالة `map` عن طريق تعيين `batched=True` لمعالجة عناصر متعددة من مجموعة البيانات في وقت واحد:
-
-```py
-tokenized_swag = swag.map(preprocess_function, batched=True)
-```
-
-لا يحتوي 🤗 Transformers على مجمع بيانات للاختيار من متعدد، لذلك ستحتاج إلى تكييف [`DataCollatorWithPadding`] لإنشاء دفعة من الأمثلة. من الأكفأ إضافة حشو (padding) ديناميكي للجمل إلى أطول طول في دفعة أثناء التجميع، بدلاً من حشو مجموعة البيانات بأكملها إلى الحد الأقصى للطول.
-
-يقوم `DataCollatorForMultipleChoice` بتجميع جميع مدخلات النموذج، ويطبق الحشو، ثم يعيد تجميع النتائج في شكلها الأصلي:
-
-<frameworkcontent>
-<pt>
-
-```py
->>> from dataclasses import dataclass
->>> from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
->>> from typing import Optional, Union
->>> import torch
-
->>> @dataclass
-... class DataCollatorForMultipleChoice:
-...     """
-...     Data collator that will dynamically pad the inputs for multiple choice received.
-...     """
-
-...     tokenizer: PreTrainedTokenizerBase
-...     padding: Union[bool, str, PaddingStrategy] = True
-...     max_length: Optional[int] = None
-...     pad_to_multiple_of: Optional[int] = None
-
-...     def __call__(self, features):
-...         label_name = "label" if "label" in features[0].keys() else "labels"
-...         labels = [feature.pop(label_name) for feature in features]
-...         batch_size = len(features)
-...         num_choices = len(features[0]["input_ids"])
-...         flattened_features = [
-...             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
-...         ]
-...         flattened_features = sum(flattened_features, [])
-
-...         batch = self.tokenizer.pad(
-...             flattened_features,
-...             padding=self.padding,
-...             max_length=self.max_length,
-...             pad_to_multiple_of=self.pad_to_multiple_of,
-...             return_tensors="pt",
-...         )
-
-...         batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
-...         batch["labels"] = torch.tensor(labels, dtype=torch.int64)
-...         return batch
-```
-</pt>
-<tf>
- 
-```py
->>> from dataclasses import dataclass
->>> from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
->>> from typing import Optional, Union
->>> import tensorflow as tf
-
->>> @dataclass
-... class DataCollatorForMultipleChoice:
-...     """
-...     Data collator that will dynamically pad the inputs for multiple choice received.
-...     """
-
-...     tokenizer: PreTrainedTokenizerBase
-...     padding: Union[bool, str, PaddingStrategy] = True
-...     max_length: Optional[int] = None
-...     pad_to_multiple_of: Optional[int] = None
-
-...     def __call__(self, features):
-...         label_name = "label" if "label" in features[0].keys() else "labels"
-...         labels = [feature.pop(label_name) for feature in features]
-...         batch_size = len(features)
-...         num_choices = len(features[0]["input_ids"])
-...         flattened_features = [
-...             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
-...         ]
-...         flattened_features = sum(flattened_features, [])
-
-...         batch = self.tokenizer.pad(
-...             flattened_features,
-...             padding=self.padding,
-...             max_length=self.max_length,
-...             pad_to_multiple_of=self.pad_to_multiple_of,
-...             return_tensors="tf",
-...         )
-
-...         batch = {k: tf.reshape(v, (batch_size, num_choices, -1)) for k, v in batch.items()}
-...         batch["labels"] = tf.convert_to_tensor(labels, dtype=tf.int64)
-...         return batch
-```
-</tf>
-</frameworkcontent>
-
-## التقييم (Evaluate)
-
-يُفضل غالبًا تضمين مقياس أثناء التدريب لتقييم أداء نموذجك. يمكنك تحميل طريقة تقييم بسرعة باستخدام مكتبة 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index). لهذه المهمة، قم بتحميل مقياس [الدقة](https://huggingface.co/spaces/evaluate-metric/accuracy) (انظر إلى [الجولة السريعة](https://huggingface.co/docs/evaluate/a_quick_tour) لـ 🤗 Evaluate لمعرفة المزيد حول كيفية تحميل المقياس وحسابه):
-
-```py
->>> import evaluate
-
->>> accuracy = evaluate.load("accuracy")
-```
-
-ثم أنشئ دالة لتمرير التنبؤات والتسميات إلى [`~evaluate.EvaluationModule.compute`] لحساب الدقة:
-
-```py
->>> import numpy as np
-
->>> def compute_metrics(eval_pred):
-...     predictions, labels = eval_pred
-...     predictions = np.argmax(predictions, axis=1)
-...     return accuracy.compute(predictions=predictions, references=labels)
-```
-
-دالتك `compute_metrics` جاهزة الآن، وستعود إليها عند إعداد تدريبك.
-
-## التدريب (Train)
-
-<frameworkcontent>
-<pt>
-
-<Tip>
-
-إذا لم تكن معتادًا على ضبط نموذج باستخدام [`Trainer`], فراجع الدرس الأساسي [هنا](../training#train-with-pytorch-trainer)!
-
-</Tip>
-
-أنت جاهز لبدء تدريب نموذجك الآن! قم بتحميل BERT باستخدام [`AutoModelForMultipleChoice`]:
-
-```py
->>> from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
-
->>> model = AutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
-```
-
-في هذه المرحلة، تبقى ثلاث خطوات فقط:
-
-1. حدد معلمات التدريب الخاصة بك في [`TrainingArguments`]. المعلمة الوحيدة المطلوبة هي `output_dir` التي تحدد مكان حفظ نموذجك. ستدفع هذا النموذج إلى Hub عن طريق تعيين `push_to_hub=True` (يجب عليك تسجيل الدخول إلى Hugging Face لتحميل نموذجك). في نهاية كل حقبة، سيقوم [`Trainer`] بتقييم الدقة وحفظ نقطة فحص التدريب.
-2. مرر معلمات التدريب إلى [`Trainer`] جنبًا إلى جنب مع النموذج ومُجمِّع البيانات والمعالج ودالة تجميع البيانات ودالة `compute_metrics`.
-3. استدعي [`~Trainer.train`] لضبط نموذجك.
-
-```py
->>> training_args = TrainingArguments(
-...     output_dir="my_awesome_swag_model",
-...     eval_strategy="epoch",
-...     save_strategy="epoch",
-...     load_best_model_at_end=True,
-...     learning_rate=5e-5,
-...     per_device_train_batch_size=16,
-...     per_device_eval_batch_size=16,
-...     num_train_epochs=3,
-...     weight_decay=0.01,
-...     push_to_hub=True,
-... )
-
->>> trainer = Trainer(
-...     model=model,
-...     args=training_args,
-...     train_dataset=tokenized_swag["train"],
-...     eval_dataset=tokenized_swag["validation"],
-...     processing_class=tokenizer,
-...     data_collator=DataCollatorForMultipleChoice(tokenizer=tokenizer),
-...     compute_metrics=compute_metrics,
-... )
-
->>> trainer.train()
-```
-
-بمجرد اكتمال التدريب، شارك نموذجك مع Hub باستخدام طريقة [`~transformers.Trainer.push_to_hub`] حتى يتمكن الجميع من استخدام نموذجك:
-
-```py
->>> trainer.push_to_hub()
-```
-</pt>
-<tf>
-<Tip>
-
-إذا لم تكن معتادًا على ضبط نموذج باستخدام Keras، فراجع الدرس الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
-
-</Tip>
-لضبط نموذج في TensorFlow، ابدأ بإعداد دالة مُحسِّن وجدول معدل التعلم وبعض معلمات التدريب:
-
-```py
->>> from transformers import create_optimizer
-
->>> batch_size = 16
->>> num_train_epochs = 2
->>> total_train_steps = (len(tokenized_swag["train"]) // batch_size) * num_train_epochs
->>> optimizer, schedule = create_optimizer(init_lr=5e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
-```
-
-ثم يمكنك تحميل BERT باستخدام [`TFAutoModelForMultipleChoice`]:
-
-```py
->>> from transformers import TFAutoModelForMultipleChoice
-
->>> model = TFAutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
-```
-
-حوّل مجموعات البيانات الخاصة بك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
-
-```py
->>> data_collator = DataCollatorForMultipleChoice(tokenizer=tokenizer)
->>> tf_train_set = model.prepare_tf_dataset(
-...     tokenized_swag["train"],
-...     shuffle=True,
-...     batch_size=batch_size,
-...     collate_fn=data_collator,
-... )
-
->>> tf_validation_set = model.prepare_tf_dataset(
-...     tokenized_swag["validation"],
-...     shuffle=False,
-...     batch_size=batch_size,
-...     collate_fn=data_collator,
-... )
-```
-
-قم بتهيئة النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن جميع نماذج Transformers تحتوي على دالة خسارة مناسبة للمهمة بشكل افتراضي، لذلك لا تحتاج إلى تحديد واحدة ما لم ترغب في ذلك:
-
-```py
->>> model.compile(optimizer=optimizer)  # لا توجد وسيطة خسارة!
-```
-
-الخطوتان الأخيرتان قبل بدء التدريب هما: حساب دقة التنبؤات، وتوفير طريقة لرفع النموذج إلى Hub. ويمكن تحقيق ذلك باستخدام [استدعاءات Keras](../main_classes/keras_callbacks)
-
-مرر دالتك `compute_metrics` إلى [`~transformers.KerasMetricCallback`]:
-
-```py
->>> from transformers.keras_callbacks import KerasMetricCallback
-
->>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
-```
-
-حدد مكان دفع نموذجك ومعالجك في [`~transformers.PushToHubCallback`]:
-
-```py
->>> from transformers.keras_callbacks import PushToHubCallback
-
->>> push_to_hub_callback = PushToHubCallback(
-...     output_dir="my_awesome_model",
-...     tokenizer=tokenizer,
-... )
-```
-
-ثم قم بتضمين الاستدعاءات معًا:
-
-```py
->>> callbacks = [metric_callback, push_to_hub_callback]
-```
-
-أخيرًا، أنت جاهز لبدء تدريب نموذجك! استدعِ[`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق من الصحة وعدد الحقب والاستدعاءات لضبط النموذج:
-
-```py
->>> model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=2, callbacks=callbacks)
-```
-
-بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
-</tf>
-</frameworkcontent>
-
-<Tip>
-
-للحصول على مثال أكثر تعمقًا حول كيفية ضبط نموذج للاختيار من متعدد، ألق نظرة على [دفتر ملاحظات PyTorch](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/multiple_choice.ipynb)
-أو [دفتر ملاحظات TensorFlow](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/multiple_choice-tf.ipynb) المقابل.
-
-</Tip>
-
-## الاستدلال  (Inference)
-
-رائع، الآن بعد أن قمت بضبط نموذج، يمكنك استخدامه للاستدلال!
-
-قم بإنشاء نص واقتراح إجابتين محتملتين:
-
-```py
->>> prompt = "France has a bread law, Le Décret Pain, with strict rules on what is allowed in a traditional baguette."
->>> candidate1 = "The law does not apply to croissants and brioche."
->>> candidate2 = "The law applies to baguettes."
-```
-
-<frameworkcontent>
-<pt>
-قم بتحليل كل مطالبة وزوج إجابة مرشح وأعد تنسورات PyTorch. يجب عليك أيضًا إنشاء بعض `العلامات`:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_swag_model")
->>> inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="pt", padding=True)
->>> labels = torch.tensor(0).unsqueeze(0)
-```
-
-مرر مدخلاتك والعلامات إلى النموذج وأرجع`logits`:
-
-```py
->>> from transformers import AutoModelForMultipleChoice
-
->>> model = AutoModelForMultipleChoice.from_pretrained("username/my_awesome_swag_model")
->>> outputs = model(**{k: v.unsqueeze(0) for k, v in inputs.items()}, labels=labels)
->>> logits = outputs.logits
-```
-
-استخرج الفئة ذات الاحتمالية الأكبر:
-
-```py
->>> predicted_class = logits.argmax().item()
->>> predicted_class
-0
-```
-</pt>
-<tf>
-قم بتحليل كل مطالبة وزوج إجابة مرشح وأعد موترات TensorFlow:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_swag_model")
->>> inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="tf", padding=True)
-```
-
-مرر مدخلاتك إلى النموذج وأعد القيم logits:
-
-```py
->>> from transformers import TFAutoModelForMultipleChoice
-
->>> model = TFAutoModelForMultipleChoice.from_pretrained("username/my_awesome_swag_model")
->>> inputs = {k: tf.expand_dims(v, 0) for k, v in inputs.items()}
->>> outputs = model(inputs)
->>> logits = outputs.logits
-```
-
-استخرج الفئة ذات الاحتمالية الأكبر:
-
-```py
->>> predicted_class = int(tf.math.argmax(logits, axis=-1)[0])
->>> predicted_class
-0
-```
-</tf>
-</frameworkcontent>
--- a/docs/source/ar/tasks/question_answering.md
+++ b/docs/source/ar/tasks/question_answering.md
@ -1,432 +0,0 @@
-<!--Copyright 2022 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-#  الإجابة على الأسئلة (Question answering)
-
-[[open-in-colab]]
-
-<Youtube id="ajPx5LwJD-I"/>
-
-تُقدّم مهام الإجابة على الأسئلة إجابةً بناءً على سؤال. إذا سبق لك أن سألت مساعدًا افتراضيًا مثل Alexa أو Siri أو Google عن حالة الطقس، فأنت قد استخدمت نموذج للإجابة على الأسئلة من قبل. هناك نوعان شائعان لمهام الإجابة على الأسئلة:
-
- الاستخراجية: استخراج الإجابة من السياق المحدد.
- التلخيصية: إنشاء إجابة من السياق تجيب على السؤال بشكل صحيح.
-
-سيوضح لك هذا الدليل كيفية:
-
-1. ضبط [DistilBERT](https://huggingface.co/distilbert/distilbert-base-uncased) على مجموعة بيانات [SQuAD](https://huggingface.co/datasets/squad) للإجابة على الأسئلة الاستخراجية.
-2. استخدام النموذج المضبوط للاستدلال.
-
-<Tip>
-
-لمشاهدة جميع الهياكل والنسخ المتوافقة مع هذه المهمة، نوصي بالرجوع إلى [صفحة المهمة](https://huggingface.co/tasks/question-answering)
-
-</Tip>
-
-قبل البدء، تأكد من تثبيت جميع المكتبات الضرورية:
-
-```bash
-pip install transformers datasets evaluate
-```
-
-نشجعك على تسجيل الدخول إلى حساب Hugging Face الخاص بك حتى تتمكن من تحميل نموذجك ومشاركته مع المجتمع. عند المطالبة، أدخل الرمز المميز الخاص بك لتسجيل الدخول:
-
-```py
->>> from huggingface_hub import notebook_login
-
->>> notebook_login()
-```
-
-## تحميل مجموعة بيانات SQuAD
-
-ابدأ بتحميل جزء أصغر من مجموعة بيانات SQuAD من مكتبة 🤗 Datasets. سيتيح لك ذلك فرصة للتجربة والتحقق من عمل كل شيء بشكل صحيح قبل قضاء المزيد من الوقت في التدريب على مجموعة البيانات الكاملة.
-
-```py
->>> from datasets import load_dataset
-
->>> squad = load_dataset("squad", split="train[:5000]")
-```
-
-قم بتقسيم تقسيم `train` لمجموعة البيانات إلى مجموعة تدريب واختبار باستخدام طريقة [`~datasets.Dataset.train_test_split`]:
-
-```py
->>> squad = squad.train_test_split(test_size=0.2)
-```
-
-ثم ألق نظرة على مثال:
-
-```py
->>> squad["train"][0]
-{'answers': {'answer_start': [515], 'text': ['Saint Bernadette Soubirous']},
- 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
- 'id': '5733be284776f41900661182',
- 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?',
- 'title': 'University_of_Notre_Dame'
-}
-```
-
-هناك العديد من الحقول المهمة هنا:
-
- `answers`: موقع بداية الرمز المميز للإجابة ونص الإجابة.
- `context`: معلومات أساسية يحتاج النموذج إلى استخراج الإجابة منها.
- `question`: السؤال الذي يجب على النموذج الإجابة عليه.
-
-## المعالجة المسبقة (Preprocess)
-
-<Youtube id="qgaM0weJHpA"/>
-
-الخطوة التالية هي تحميل المحلل اللغوى DistilBERT لمعالجة حقلي `question` و `context`:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-هناك بعض خطوات المعالجة المسبقة الخاصة بمهام الإجابة على الأسئلة التي يجب أن تكون على دراية بها:
-
-1. قد تحتوي بعض الأمثلة في مجموعة البيانات على `context` طويلًا يتجاوز الحد الأقصى لطول مدخل النموذج. للتعامل مع النصوص الأطول، يتم اقتطاع `context` فقط عن طريق تعيين `truncation="only_second"`.
-2. بعد ذلك، يتم تحديد مواضع بداية ونهاية الإجابة في `context` الأصلي عن طريق تعيين
-   `return_offset_mapping=True`.
-3. باستخدام التعيين، يمكن الآن تحديد رموز بداية ونهاية الإجابة. استخدم طريقة [`~tokenizers.Encoding.sequence_ids`]
-   لتحديد أجزاء الإزاحة التي تتوافق مع `question` و `context`.
-
-فيما يلي كيفية إنشاء دالة لقص وتعيين رموز البداية والنهاية لـ `answer` إلى `context`:
-
-```py
->>> def preprocess_function(examples):
-...     questions = [q.strip() for q in examples["question"]]
-...     inputs = tokenizer(
-...         questions,
-...         examples["context"],
-...         max_length=384,
-...         truncation="only_second",
-...         return_offsets_mapping=True,
-...         padding="max_length",
-...     )
-
-...     offset_mapping = inputs.pop("offset_mapping")
-...     answers = examples["answers"]
-...     start_positions = []
-...     end_positions = []
-
-...     for i, offset in enumerate(offset_mapping):
-...         answer = answers[i]
-...         start_char = answer["answer_start"][0]
-...         end_char = answer["answer_start"][0] + len(answer["text"][0])
-...         sequence_ids = inputs.sequence_ids(i)
-
-...         # Find the start and end of the context
-...         idx = 0
-...         while sequence_ids[idx] != 1:
-...             idx += 1
-...         context_start = idx
-...         while sequence_ids[idx] == 1:
-...             idx += 1
-...         context_end = idx - 1
-
-...         # If the answer is not fully inside the context, label it (0, 0)
-...         if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
-...             start_positions.append(0)
-...             end_positions.append(0)
-...         else:
-...             # Otherwise it's the start and end token positions
-...             idx = context_start
-...             while idx <= context_end and offset[idx][0] <= start_char:
-...                 idx += 1
-...             start_positions.append(idx - 1)
-
-...             idx = context_end
-...             while idx >= context_start and offset[idx][1] >= end_char:
-...                 idx -= 1
-...             end_positions.append(idx + 1)
-
-...     inputs["start_positions"] = start_positions
-...     inputs["end_positions"] = end_positions
-...     return inputs
-```
-
-لتطبيق المعالجة المسبقة على كامل مجموعة البيانات، استخدم [`~datasets.Dataset.map`] من مكتبة 🤗 Datasets. يمكنك تسريع دالة `map` عن طريق تعيين `batched=True` لمعالجة عناصر متعددة من مجموعة البيانات دفعة واحدة. قم بإزالة أي أعمدة لا تحتاجها:
-
-```py
->>> tokenized_squad = squad.map(preprocess_function, batched=True, remove_columns=squad["train"].column_names)
-```
-
-الآن قم بإنشاء دفعة من الأمثلة باستخدام [`DefaultDataCollator`]. بخلاف مجمّعات البيانات الأخرى في 🤗 Transformers، لا يطبق [`DefaultDataCollator`] أي معالجة مسبقة إضافية مثل الحشو.
-
-<frameworkcontent>
-<pt>
- 
-```py
->>> from transformers import DefaultDataCollator
-
->>> data_collator = DefaultDataCollator()
-```
-</pt>
-<tf>
- 
-```py
->>> from transformers import DefaultDataCollator
-
->>> data_collator = DefaultDataCollator(return_tensors="tf")
-```
-</tf>
-</frameworkcontent>
-
-## التدريب (Train)
-
-<frameworkcontent>
-<pt>
-
-<Tip>
-
-إذا لم تكن معتادًا على ضبط نموذج باستخدام [`Trainer`], ألق نظرة على البرنامج التعليمي الأساسي [هنا](../training#train-with-pytorch-trainer)!
-
-</Tip>
-
-أنت جاهز لبدء تدريب نموذجك الآن! قم بتحميل DistilBERT باستخدام [`AutoModelForQuestionAnswering`]:
-
-```py
->>> from transformers import AutoModelForQuestionAnswering, TrainingArguments, Trainer
-
->>> model = AutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-في هذه المرحلة، تبقى ثلاث خطوات فقط:
-
-1. حدد المعاملات الفائقة للتدريب في [`TrainingArguments`]. المعامل الوحيد المطلوب هو `output_dir` الذي يحدد مكان حفظ نموذجك. ستدفع هذا النموذج إلى Hub عن طريق تعيين `push_to_hub=True` (يجب عليك تسجيل الدخول إلى Hugging Face لتحميل نموذجك).
-2. مرر معاملات التدريب إلى [`Trainer`] جنبًا إلى جنب مع النموذج، ومجموعة البيانات، والمُحلّل النصي، ومُجمّع البيانات.
-3. استدعِ ـ [`~Trainer.train`] لضبط النموذج.
-
-```py
->>> training_args = TrainingArguments(
-...     output_dir="my_awesome_qa_model",
-...     eval_strategy="epoch",
-...     learning_rate=2e-5,
-...     per_device_train_batch_size=16,
-...     per_device_eval_batch_size=16,
-...     num_train_epochs=3,
-...     weight_decay=0.01,
-...     push_to_hub=True,
-... )
-
->>> trainer = Trainer(
-...     model=model,
-...     args=training_args,
-...     train_dataset=tokenized_squad["train"],
-...     eval_dataset=tokenized_squad["test"],
-...     processing_class=tokenizer,
-...     data_collator=data_collator,
-... )
-
->>> trainer.train()
-```
-
-بمجرد اكتمال التدريب، شارك نموذجك في Hub باستخدام الدالة [`~transformers.Trainer.push_to_hub`] حتى يتمكن الجميع من استخدام نموذجك:
-
-```py
->>> trainer.push_to_hub()
-```
-</pt>
-<tf>
- 
-<Tip>
-
-إذا لم تكن معتادًا على ضبط نموذج باستخدام Keras، فألق نظرة على البرنامج التعليمي الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
-
-</Tip>
-لضبط نموذج في TensorFlow، ابدأ بإعداد دالة مُحسِّن، وجدول معدل التعلم، وبعض المعاملات الفائقة للتدريب:
-
-```py
->>> from transformers import create_optimizer
-
->>> batch_size = 16
->>> num_epochs = 2
->>> total_train_steps = (len(tokenized_squad["train"]) // batch_size) * num_epochs
->>> optimizer, schedule = create_optimizer(
-...     init_lr=2e-5,
-...     num_warmup_steps=0,
-...     num_train_steps=total_train_steps,
-... )
-```
-
-ثم يمكنك تحميل DistilBERT باستخدام [`TFAutoModelForQuestionAnswering`]:
-
-```py
->>> from transformers import TFAutoModelForQuestionAnswering
-
->>> model = TFAutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-حوّل مجموعات البيانات الخاصة بك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
-
-```py
->>> tf_train_set = model.prepare_tf_dataset(
-...     tokenized_squad["train"],
-...     shuffle=True,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-
->>> tf_validation_set = model.prepare_tf_dataset(
-...     tokenized_squad["test"],
-...     shuffle=False,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-```
-
-قم بتكوين النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method):
-
-```py
->>> import tensorflow as tf
-
->>> model.compile(optimizer=optimizer)
-```
-
-آخر شيء يجب إعداده قبل بدء التدريب هو توفير طريقة لدفع نموذجك إلى Hub. يمكن القيام بذلك عن طريق تحديد مكان دفع نموذجك ومعالجك المعجمي في [`~transformers.PushToHubCallback`]:
-
-```py
->>> from transformers.keras_callbacks import PushToHubCallback
-
->>> callback = PushToHubCallback(
-...     output_dir="my_awesome_qa_model",
-...     tokenizer=tokenizer,
-... )
-```
-
-أخيرًا، أنت جاهز لبدء تدريب نموذجك! اتصل بـ [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق من الصحة، وعدد العهود، ومعاودة الاتصال الخاصة بك لضبط النموذج:
-
-```py
->>> model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=[callback])
-```
-بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
-</tf>
-</frameworkcontent>
-
-
-<Tip>
-
-للحصول على مثال أكثر تعمقًا حول كيفية ضبط نموذج للإجابة على الأسئلة، ألق نظرة على [دفتر ملاحظات PyTorch](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/question_answering.ipynb) المقابل
-أو [دفتر ملاحظات TensorFlow](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/question_answering-tf.ipynb).
-
-</Tip>
-
-## التقييم (Evaluate)
-
-يتطلب التقييم للإجابة على الأسئلة قدرًا كبيرًا من المعالجة اللاحقة. لتوفير وقتك، يتخطى هذا الدليل خطوة التقييم. لا يزال [`Trainer`] يحسب خسارة التقييم أثناء التدريب، مما يعني أنك لست تجهل تمامًا أداء نموذجك.
-
-إذا كان لديك المزيد من الوقت وتهتم بكيفية تقييم نموذجك للإجابة على الأسئلة، فألق نظرة على فصل [الإجابة على الأسئلة](https://huggingface.co/course/chapter7/7?fw=pt#post-processing) من دورة 🤗 Hugging Face!
-
-## الاستدلال (Inference)
-
-رائع، الآن بعد أن قمت بضبط نموذج، يمكنك استخدامه للاستدلال!
-
-حدد سؤالًا وسياقًا ليقوم النموذج بالتنبؤ بالإجابة عليه:
-
-```py
->>> question = "How many programming languages does BLOOM support?"
->>> context = "BLOOM has 176 billion parameters and can generate text in 46 languages natural languages and 13 programming languages."
-```
-
-أبسط طريقة لتجربة نموذجك المُدرَّب للاستدلال هي استخدامه في [`pipeline`]. قم بإنشاء كائن لـ `pipeline` للإجابة على الأسئلة باستخدام نموذجك، ومرِّر النص إليه:
-
-```py
->>> from transformers import pipeline
-
->>> question_answerer = pipeline("question-answering", model="my_awesome_qa_model")
->>> question_answerer(question=question, context=context)
-{'score': 0.2058267742395401,
- 'start': 10,
- 'end': 95,
- 'answer': '176 مليار معامل ويمكنه إنشاء نصوص بـ 46 لغة طبيعية و 13'}
-```
-
-يمكنك أيضًا تكرار نتائج `pipeline` يدويًا إذا أردت:
-
-<frameworkcontent>
-<pt>
- 
- قسّم النص وأرجع تنسورات PyTorch:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
->>> inputs = tokenizer(question, context, return_tensors="pt")
-```
-
-مرر مدخلاتك إلى النموذج وأرجع `logits`:
-
-```py
->>> import torch
->>> from transformers import AutoModelForQuestionAnswering
-
->>> model = AutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
->>> with torch.no_grad():
-...     outputs = model(**inputs)
-```
-
-احصل على أعلى احتمال من مخرجات النموذج لموضعي البداية والنهاية:
-
-```py
->>> answer_start_index = outputs.start_logits.argmax()
->>> answer_end_index = outputs.end_logits.argmax()
-```
-
-استخلاص الإجابة من الرموز المتوقعة:
-
-```py
->>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
->>> tokenizer.decode(predict_answer_tokens)
-'176 billion parameters and can generate text in 46 languages natural languages and 13'
-```
-</pt>
-<tf>
-قم بتحليل النص المعجمي وأعد موترات TensorFlow:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
->>> inputs = tokenizer(question, context, return_tensors="tf")
-```
-
-مرر مدخلاتك إلى النموذج وأعد `logits`:
-
-```py
->>> from transformers import TFAutoModelForQuestionAnswering
-
->>> model = TFAutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
->>> outputs = model(**inputs)
-```
-
-احصل على أعلى احتمال من مخرجات النموذج لموضعي البداية والنهاية:
-
-```py
->>> answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
->>> answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])
-```
-
-استخلاص الإجابة من الرموز المتوقعة:
-
-```py
->>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
->>> tokenizer.decode(predict_answer_tokens)
-'176 billion parameters and can generate text in 46 languages natural languages and 13'
-```
-</tf>
-</frameworkcontent>
--- a/docs/source/ar/tasks/sequence_classification.md
+++ b/docs/source/ar/tasks/sequence_classification.md
@ -1,387 +0,0 @@
-<!--Copyright 2022 The HuggingFace Team. All rights reserved.
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-->
-
-# تصنيف النص(Text classification)
-
-[[open-in-colab]]
-
-<Youtube id="leNG9fN9FQU"/>
-
-تصنيف النص هو مهمة NLP شائعة حيث يُعيّن تصنيفًا أو فئة للنص. تستخدم بعض أكبر الشركات تصنيف النصوص في الإنتاج لمجموعة واسعة من التطبيقات العملية. أحد أكثر أشكال تصنيف النص شيوعًا هو تحليل المشاعر، والذي يقوم بتعيين تسمية مثل 🙂 إيجابية، 🙁 سلبية، أو 😐 محايدة لتسلسل نصي.
-
-سيوضح لك هذا الدليل كيفية:
-
-1. ضبط [DistilBERT](https://huggingface.co/distilbert/distilbert-base-uncased) على مجموعة بيانات [IMDb](https://huggingface.co/datasets/imdb) لتحديد ما إذا كانت مراجعة الفيلم إيجابية أو سلبية.
-2. استخدام نموذج الضبط الدقيق للتنبؤ.
-
-<Tip>
-
-لرؤية جميع البنى ونقاط التحقق المتوافقة مع هذه المهمة، نوصي بالتحقق من [صفحة المهمة](https://huggingface.co/tasks/text-classification).
-
-</Tip>
-
-قبل أن تبدأ، تأكد من تثبيت جميع المكتبات الضرورية:
-
-```bash
-pip install transformers datasets evaluate accelerate
-```
-
-نحن نشجعك على تسجيل الدخول إلى حساب Hugging Face الخاص بك حتى تتمكن من تحميل ومشاركة نموذجك مع المجتمع. عند المطالبة، أدخل رمزك لتسجيل الدخول:
-
-```py
->>> from huggingface_hub import notebook_login
-
->>> notebook_login()
-```
-
-## تحميل مجموعة بيانات IMDb
-
-ابدأ بتحميل مجموعة بيانات IMDb من مكتبة 🤗 Datasets:
-
-```py
->>> from datasets import load_dataset
-
->>> imdb = load_dataset("imdb")
-```
-
-ثم ألق نظرة على مثال:
-
-```py
->>> imdb["test"][0]
-{
-    "label": 0,
-    "text": "I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It's really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it's rubbish as they have to always say \"Gene Roddenberry's Earth...\" otherwise people would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, cheap, poorly edited (watching it without advert breaks really brings this home) trudging Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring him back as another actor. Jeeez! Dallas all over again.",
-}
-```
-
-هناك حقولان في هذه المجموعة من البيانات:
-
- `text`: نص مراجعة الفيلم.
- `label`: قيمة إما `0` لمراجعة سلبية أو `1` لمراجعة إيجابية.
-
-## المعالجة المسبقة(Preprocess)
-
-الخطوة التالية هي تحميل المُجزِّئ النص DistilBERT لتهيئة لحقل `text`:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-أنشئ دالة لتهيئة حقل `text` وتقصير السلاسل النصية بحيث لا يتجاوز طولها الحد الأقصى لإدخالات DistilBERT:
-
-```py
->>> def preprocess_function(examples):
-...     return tokenizer(examples["text"], truncation=True)
-```
-
-لتطبيق دالة التهيئة على مجموعة البيانات بأكملها، استخدم دالة 🤗 Datasets [`~datasets.Dataset.map`] . يمكنك تسريع `map` باستخدام `batched=True` لمعالجة دفعات من البيانات:
-
-```py
-tokenized_imdb = imdb.map(preprocess_function, batched=True)
-```
-
-الآن قم بإنشاء دفعة من الأمثلة باستخدام [`DataCollatorWithPadding`].  الأكثر كفاءة هو استخدام الحشو الديناميكي لجعل الجمل متساوية في الطول داخل كل دفعة، بدلًا من حشو كامل البيانات إلى الحد الأقصى للطول.
-
-<frameworkcontent>
-<pt>
-
-```py
->>> from transformers import DataCollatorWithPadding
-
->>> data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
-```
-</pt>
-<tf>
-
-```py
->>> from transformers import DataCollatorWithPadding
-
->>> data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
-```
-</tf>
-</frameworkcontent>
-
-## التقييم(Evaluate)
-
-يُعدّ تضمين مقياس أثناء التدريب مفيدًا لتقييم أداء النموذج. يمكنك تحميل طريقة تقييم بسرعة باستخدام مكتبة 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index) . بالنسبة لهذه المهمة، قم بتحميل مقياس [الدقة](https://huggingface.co/spaces/evaluate-metric/accuracy) (راجع جولة 🤗 Evaluate [السريعة](https://huggingface.co/docs/evaluate/a_quick_tour) لمعرفة المزيد حول كيفية تحميل وحساب مقياس):
-
-```py
->>> import evaluate
-
->>> accuracy = evaluate.load("accuracy")
-```
-
-ثم أنشئ دالة تقوم بتمرير تنبؤاتك وتصنيفاتك إلى [`~evaluate.EvaluationModule.compute`] لحساب الدقة:
-
-```py
->>> import numpy as np
-
->>> def compute_metrics(eval_pred):
-...     predictions, labels = eval_pred
-...     predictions = np.argmax(predictions, axis=1)
-...     return accuracy.compute(predictions=predictions, references=labels)
-```
-
-دالة `compute_metrics` جاهزة الآن، وستعود إليها عند إعداد التدريب.
-
-## التدريب(Train)
-
-قبل أن تبدأ في تدريب نموذجك، قم بإنشاء خريطة من المعرفات المتوقعة إلى تسمياتها باستخدام `id2label` و `label2id`:
-
-```py
->>> id2label = {0: "NEGATIVE", 1: "POSITIVE"}
->>> label2id = {"NEGATIVE": 0, "POSITIVE": 1}
-```
-
-<frameworkcontent>
-<pt>
-<Tip>
-
-إذا لم تكن على دراية بضبط نموذج دقيق باستخدام [`Trainer`], فالق نظرة على البرنامج التعليمي الأساسي [هنا](../training#train-with-pytorch-trainer)!
-
-</Tip>
-
-أنت مستعد الآن لبدء تدريب نموذجك! قم بتحميل DistilBERT مع [`AutoModelForSequenceClassification`] جنبًا إلى جنب مع عدد التصنيفات المتوقعة، وتصنيفات الخرائط:
-
-```py
->>> from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
-
->>> model = AutoModelForSequenceClassification.from_pretrained(
-...     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
-... )
-```
-
-في هذه المرحلة، هناك ثلاث خطوات فقط متبقية:
-
-1.  حدد مُعامِلات التدريب في [`TrainingArguments`]. المُعامل المطلوب الوحيد هو `output_dir`، لتحديد مكان حفظ النموذج. يمكنك رفع النموذج إلى Hub بتعيين `push_to_hub=True` (يجب تسجيل الدخول إلى Hugging Face لرفع النموذج). سيقوم `Trainer` بتقييم الدقة وحفظ نقاط التحقق في نهاية كل حقبة.
-2.  مرر مُعامِلات التدريب إلى `Trainer` مع النموذج، ومجموعة البيانات، والمحلل اللغوي، ومُجمِّع البيانات، ووظيفة `compute_metrics`.
-3.  استدعِ [`~Trainer.train`] لضبط النموذج.
-
-```py
->>> training_args = TrainingArguments(
-...     output_dir="my_awesome_model",
-...     learning_rate=2e-5,
-...     per_device_train_batch_size=16,
-...     per_device_eval_batch_size=16,
-...     num_train_epochs=2,
-...     weight_decay=0.01,
-...     eval_strategy="epoch",
-...     save_strategy="epoch",
-...     load_best_model_at_end=True,
-...     push_to_hub=True,
-... )
-
->>> trainer = Trainer(
-...     model=model,
-...     args=training_args,
-...     train_dataset=tokenized_imdb["train"],
-...     eval_dataset=tokenized_imdb["test"],
-...     processing_class=tokenizer,
-...     data_collator=data_collator,
-...     compute_metrics=compute_metrics,
-... )
-
->>> trainer.train()
-```
-
-<Tip>
-
-يستخدم [`Trainer`] الحشو الديناميكي افتراضيًا عند تمرير `tokenizer` إليه. في هذه الحالة،  لا تحتاج لتحديد مُجمِّع البيانات صراحةً. 
-
-</Tip>
-
-بعد اكتمال التدريب، شارك نموذجك على Hub باستخدام الطريقة [`~transformers.Trainer.push_to_hub`] ليستخدمه الجميع:
-
-```py
->>> trainer.push_to_hub()
-```
-</pt>
-<tf>
-<Tip>
-
-إذا لم تكن على دراية بضبط نموذج باستخدام Keras، قم بالاطلاع على البرنامج التعليمي الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
-
-</Tip>
-لضبط نموذج في TensorFlow، ابدأ بإعداد دالة المحسن، وجدول معدل التعلم، وبعض معلمات التدريب:
-
-```py
->>> from transformers import create_optimizer
->>> import tensorflow as tf
-
->>> batch_size = 16
->>> num_epochs = 5
->>> batches_per_epoch = len(tokenized_imdb["train"]) // batch_size
->>> total_train_steps = int(batches_per_epoch * num_epochs)
->>> optimizer, schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
-```
-
-ثم يمكنك تحميل DistilBERT مع [`TFAutoModelForSequenceClassification`] بالإضافة إلى عدد التصنيفات المتوقعة، وتعيينات التسميات:
-
-```py
->>> from transformers import TFAutoModelForSequenceClassification
-
->>> model = TFAutoModelForSequenceClassification.from_pretrained(
-...     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
-... )
-```
-
-قم بتحويل مجموعات بياناتك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
-
-```py
->>> tf_train_set = model.prepare_tf_dataset(
-...     tokenized_imdb["train"],
-...     shuffle=True,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-
->>> tf_validation_set = model.prepare_tf_dataset(
-...     tokenized_imdb["test"],
-...     shuffle=False,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-```
-
-قم بتهيئة النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن جميع نماذج Transformers لديها دالة خسارة ذات صلة بالمهمة بشكل افتراضي، لذلك لا تحتاج إلى تحديد واحدة ما لم ترغب في ذلك:
-
-```py
->>> import tensorflow as tf
-
->>> model.compile(optimizer=optimizer)  # No loss argument!
-```
-
-آخر أمرين يجب إعدادهما قبل بدء التدريب هو حساب الدقة من التوقعات، وتوفير طريقة لدفع نموذجك إلى Hub. يتم ذلك باستخدام [Keras callbacks](../main_classes/keras_callbacks).
-
-قم بتمرير دالة `compute_metrics` الخاصة بك إلى [`~transformers.KerasMetricCallback`]:
-
-```py
->>> from transformers.keras_callbacks import KerasMetricCallback
-
->>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
-```
-
-حدد مكان دفع نموذجك والمجزئ اللغوي في [`~transformers.PushToHubCallback`]:
-
-```py
->>> from transformers.keras_callbacks import PushToHubCallback
-
->>> push_to_hub_callback = PushToHubCallback(
-...     output_dir="my_awesome_model",
-...     tokenizer=tokenizer,
-... )
-```
-
-ثم اجمع الاستدعاءات معًا:
-
-```py
->>> callbacks = [metric_callback, push_to_hub_callback]
-```
-
-أخيرًا، أنت مستعد لبدء تدريب نموذجك! قم باستدعاء [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق، وعدد الحقبات، واستدعاءاتك لضبط النموذج:
-
-```py
->>> model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
-```
-
-بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
-</tf>
-</frameworkcontent>
-
-<Tip>
-
-للحصول على مثال أكثر عمقًا حول كيفية ضبط نموذج لتصنيف النصوص، قم بالاطلاع على الدفتر المقابل
-[دفتر PyTorch](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification.ipynb)
-أو [دفتر TensorFlow](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification-tf.ipynb).
-
-</Tip>
-
-## الاستدلال(Inference)
-
-رائع، الآن بعد أن قمت بضبط نموذج، يمكنك استخدامه للاستدلال!
-
-احصل على بعض النصوص التي ترغب في إجراء الاستدلال عليها:
-
-```py
->>> text = "This was a masterpiece. Not completely faithful to the books, but enthralling from beginning to end. Might be my favorite of the three."
-```
-
-أسهل طريقة لتجربة النموذج المضبوط للاستدلال هي استخدامه ضمن [`pipeline`]. قم بإنشاء `pipeline` لتحليل المشاعر مع نموذجك، ومرر نصك إليه:
-
-```py
->>> from transformers import pipeline
-
->>> classifier = pipeline("sentiment-analysis", model="stevhliu/my_awesome_model")
->>> classifier(text)
-[{'label': 'POSITIVE', 'score': 0.9994940757751465}]
-```
-
-يمكنك أيضًا تكرار نتائج `pipeline` يدويًا إذا أردت:
-
-<frameworkcontent>
-<pt>
-قم يتجزئة النص وإرجاع تنسورات PyTorch:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
->>> inputs = tokenizer(text, return_tensors="pt")
-```
-
-مرر المدخلات إلى النموذج واسترجع `logits`:
-
-```py
->>> from transformers import AutoModelForSequenceClassification
-
->>> model = AutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
->>> with torch.no_grad():
-...     logits = model(**inputs).logits
-```
-
-استخرج الفئة ذات الاحتمالية الأعلى، واستخدم `id2label` لتحويلها إلى تصنيف نصي:
-
-```py
->>> predicted_class_id = logits.argmax().item()
->>> model.config.id2label[predicted_class_id]
-'POSITIVE'
-```
-</pt>
-<tf>
-قم بتحليل النص وإرجاع تنسيقات TensorFlow:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
->>> inputs = tokenizer(text, return_tensors="tf")
-```
-
-قم بتمرير مدخلاتك إلى النموذج وإرجاع `logits`:
-
-```py
->>> from transformers import TFAutoModelForSequenceClassification
-
->>> model = TFAutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
->>> logits = model(**inputs).logits
-```
-
-استخرج الفئة ذات الاحتمالية الأعلى، واستخدم `id2label` لتحويلها إلى تصنيف نصي:
-
-```py
->>> predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
->>> model.config.id2label[predicted_class_id]
-'POSITIVE'
-```
-</tf>
-</frameworkcontent>
--- a/docs/source/ar/tasks/summarization.md
+++ b/docs/source/ar/tasks/summarization.md
--- a/docs/source/ar/tasks/token_classification.md
+++ b/docs/source/ar/tasks/token_classification.md
@ -1,550 +0,0 @@
-<!--Copyright 2022 The HuggingFace Team. All rights reserved.
-	Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-	the License. You may obtain a copy of the License at
-	http://www.apache.org/licenses/LICENSE-2.0
-	Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-	an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-	specific language governing permissions and limitations under the License.
-	⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-	rendered properly in your Markdown viewer.
-	-->
-
-# تصنيف الرموز(Token classification)
-
-[[open-in-colab]]
-
-<Youtube id="wVHdVlPScxA"/>
-
-يهدف تصنيف الرموز إلى إعطاء تسمية لكل رمز على حدة في الجملة. من أكثر مهام تصنيف الرموز شيوعًا هو التعرف على الكيانات المسماة (NER). يحاول NER تحديد تسمية لكل كيان في الجملة، مثل شخص، أو مكان، أو منظمة. 
-
-سيوضح لك هذا الدليل كيفية:
-
-1. ضبط [DistilBERT](https://huggingface.co/distilbert/distilbert-base-uncased) على مجموعة بيانات [WNUT 17](https://huggingface.co/datasets/wnut_17) للكشف عن كيانات جديدة.
-2.  استخدام نموذجك المضبوط بدقة للاستدلال.
-
-<Tip>
-
-للاطلاع جميع البنى والنقاط المتوافقة مع هذه المهمة، نوصي بالرجوع من [صفحة المهمة](https://huggingface.co/tasks/token-classification).
-
-</Tip>
-
-قبل أن تبدأ، تأكد من تثبيت جميع المكتبات الضرورية:
-
-```bash
-pip install transformers datasets evaluate seqeval
-```
-
-نحن نشجعك على تسجيل الدخول إلى حساب HuggingFace الخاص بك حتى تتمكن من تحميل ومشاركة نموذجك مع المجتمع. عندما يُطلب منك، أدخل رمزك لتسجيل الدخول:
-
-```py
->>> from huggingface_hub import notebook_login
-
->>> notebook_login()
-```
-
-## تحميل مجموعة بيانات WNUT 17
-
-ابدأ بتحميل مجموعة بيانات WNUT 17 من مكتبة 🤗 Datasets:
-
-```py
->>> from datasets import load_dataset
-
->>> wnut = load_dataset("wnut_17")
-```
-
-ثم ألق نظرة على مثال:
-
-```py
->>> wnut["train"][0]
-{'id': '0',
- 'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 8, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0],
- 'tokens': ['@paulwalk', 'It', "'s", 'the', 'view', 'from', 'where', 'I', "'m", 'living', 'for', 'two', 'weeks', '.', 'Empire', 'State', 'Building', '=', 'ESB', '.', 'Pretty', 'bad', 'storm', 'here', 'last', 'evening', '.']
-}
-```
-
-يمثل كل رقم في `ner_tags` كياناً. حوّل الأرقام إلى أسماء التصنيفات لمعرفة ماهية الكيانات:
-
-```py
->>> label_list = wnut["train"].features[f"ner_tags"].feature.names
->>> label_list
-[
-    "O",
-    "B-corporation",
-    "I-corporation",
-    "B-creative-work",
-    "I-creative-work",
-    "B-group",
-    "I-group",
-    "B-location",
-    "I-location",
-    "B-person",
-    "I-person",
-    "B-product",
-    "I-product",
-]
-```
-
-يشير الحرف الذي يسبق كل `ner_tag` إلى موضع الرمز للكيان:
-
- `B-` يشير إلى بداية الكيان.
- `I-` يشير إلى أن الرمز يقع ضمن نفس الكيان (على سبيل المثال، الرمز `State` هو جزء من كيان مثل `Empire State Building`).
- `0` يشير إلى أن الرمز لا يمثل أي كيان.
-
-## المعالجة المسبقة(Preprocess)
-
-<Youtube id="iY2AZYdZAr0"/>
-
-الخطوة التالية هي تحميل مُجزِّئ النصوص DistilBERT للمعالجة المسبقة لحقل `tokens`:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-كما رأيت في حقل `tokens` المثال أعلاه، يبدو أن المدخل قد تم تحليله بالفعل. لكن المدخل  لم يُجزأ بعد ويتعيّن عليك ضبط `is_split_into_words=True` لتقسيم الكلمات إلى كلمات فرعية. على سبيل المثال:
-
-```py
->>> example = wnut["train"][0]
->>> tokenized_input = tokenizer(example["tokens"], is_split_into_words=True)
->>> tokens = tokenizer.convert_ids_to_tokens(tokenized_input["input_ids"])
->>> tokens
-['[CLS]', '@', 'paul', '##walk', 'it', "'", 's', 'the', 'view', 'from', 'where', 'i', "'", 'm', 'living', 'for', 'two', 'weeks', '.', 'empire', 'state', 'building', '=', 'es', '##b', '.', 'pretty', 'bad', 'storm', 'here', 'last', 'evening', '.', '[SEP]']
-```
-
-ومع ذلك، يضيف هذا بعض الرموز الخاصة `[CLS]` و`[SEP]` وتقسيم الكلمات إلى أجزاء يُنشئ عدم تطابق بين المُدخلات والتسميات. قد يتم تقسيم كلمة واحدة تقابل تسمية واحدة الآن إلى كلمتين فرعيتين. ستحتاج إلى إعادة محاذاة الرموز والتسميات عن طريق:
-
-1. ربط كل رمز بالكلمة الأصلية باستخدام الخاصية [`word_ids`](https://huggingface.co/docs/transformers/main_classes/tokenizer#transformers.BatchEncoding.word_ids).
-2. تعيين التسمية `-100` للرموز الخاصة `[CLS]` و`[SEP]` بحيث يتم تجاهلها بواسطة دالة الخسارة PyTorch (انظر [CrossEntropyLoss](https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html)).
-3. تسمية الرمز الأول فقط لكلمة معينة. قم بتعيين `-100` لأجزاء الكلمة الأخرى.
-
-هنا كيف يمكنك إنشاء وظيفة لإعادة محاذاة الرموز والتسميات، وقص الجمل لتتجاوز الحد الأقصى لطول مُدخلات DistilBERT:
-
-```py
->>> def tokenize_and_align_labels(examples):
-...     tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
-
-...     labels = []
-...     for i, label in enumerate(examples[f"ner_tags"]):
-...         word_ids = tokenized_inputs.word_ids(batch_index=i)  # تعيين الرموز إلى كلماتهم المقابلة.
-...         previous_word_idx = None
-...         label_ids = []
-...         for word_idx in word_ids:  # تعيين الرموز الخاصة إلى -100.
-...             if word_idx is None:
-...                 label_ids.append(-100)
-...             elif word_idx != previous_word_idx:  # تسمية الرمز الأول فقط لكلمة معينة.
-...                 label_ids.append(label[word_idx])
-...             else:
-...                 label_ids.append(-100)
-...             previous_word_idx = word_idx
-...         labels.append(label_ids)
-
-...     tokenized_inputs["labels"] = labels
-...     return tokenized_inputs
-```
-
-لتطبيق هذه العملية على كامل مجموعة البيانات، استخدم الدالة [`~datasets.Dataset.map`] لمجموعة بيانات 🤗. يمكنك تسريع الدالة `map` عن طريق تعيين `batched=True` لمعالجة عناصر متعددة من مجموعة البيانات في وقت واحد:
-
-```py
->>> tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)
-```
-
-الآن قم بإنشاء دفعة من الأمثلة باستخدام [`DataCollatorWithPadding`].من الأفضل استخدام *الحشو الديناميكي* للجمل إلى أطول طول في دفعة أثناء التجميع، بدلاً من حشو مجموعة البيانات بالكامل إلى الطول الأقصى.
-
-<frameworkcontent>
-<pt>
-```py
->>> from transformers import DataCollatorForTokenClassification
-
->>> data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
-```
-</pt>
-<tf>
-```py
->>> from transformers import DataCollatorForTokenClassification
-
->>> data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="tf")
-```
-</tf>
-</frameworkcontent>
-
-## التقييم(Evaluate)
-
-يُعدّ تضمين مقياس أثناء التدريب مفيدًا في تقييم أداء نموذجك. يمكنك تحميل طريقة تقييم بسرعة مع مكتبة 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index). لهذه المهمة، قم بتحميل إطار [seqeval](https://huggingface.co/spaces/evaluate-metric/seqeval) (انظر جولة 🤗 Evaluate [quick tour](https://huggingface.co/docs/evaluate/a_quick_tour) لمعرفة المزيد حول كيفية تحميل وحساب مقياس). يُخرج seqeval عدة نتائج: الدقة، والاستذكار، ومقياس F1، والدقة.
-
-```py
->>> import evaluate
-
->>> seqeval = evaluate.load("seqeval")
-```
-
-احصل على تسميات الكيانات المسماة (NER) أولاً،ثم أنشئ دالة تُمرر تنبؤاتك وتسمياتك الصحيحة إلى [`~evaluate.EvaluationModule.compute`] لحساب النتائج:
-
-```py
->>> import numpy as np
-
->>> labels = [label_list[i] for i in example[f"ner_tags"]]
-
->>> def compute_metrics(p):
-...     predictions, labels = p
-...     predictions = np.argmax(predictions, axis=2)
-
-...     true_predictions = [
-...         [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
-...         for prediction, label in zip(predictions, labels)
-...     ]
-...     true_labels = [
-...         [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
-...         for prediction, label in zip(predictions, labels)
-...     ]
-
-...     results = seqeval.compute(predictions=true_predictions, references=true_labels)
-...     return {
-...         "precision": results["overall_precision"],
-...         "recall": results["overall_recall"],
-...         "f1": results["overall_f1"],
-...         "accuracy": results["overall_accuracy"],
-...     }
-```
-
-دالة `compute_metrics` جاهزة للاستخدام، وستحتاج إليها عند إعداد التدريب.
-
-## التدريب(Train)
-
-قبل تدريب النموذج، جهّز خريطة تربط بين المعرّفات المتوقعة وتسمياتها باستخدام `id2label` و `label2id`:
-
-```py
->>> id2label = {
-...     0: "O",
-...     1: "B-corporation",
-...     2: "I-corporation",
-...     3: "B-creative-work",
-...     4: "I-creative-work",
-...     5: "B-group",
-...     6: "I-group",
-...     7: "B-location",
-...     8: "I-location",
-...     9: "B-person",
-...     10: "I-person",
-...     11: "B-product",
-...     12: "I-product",
-... }
->>> label2id = {
-...     "O": 0,
-...     "B-corporation": 1,
-...     "I-corporation": 2,
-...     "B-creative-work": 3,
-...     "I-creative-work": 4,
-...     "B-group": 5,
-...     "I-group": 6,
-...     "B-location": 7,
-...     "I-location": 8,
-...     "B-person": 9,
-...     "I-person": 10,
-...     "B-product": 11,
-...     "I-product": 12,
-... }
-```
-
-<frameworkcontent>
-<pt>
-<Tip>
-
-إذا لم تكن على دراية بتعديل نموذج باستخدام [`Trainer`], ألق نظرة على الدليل التعليمي الأساسي [هنا](../training#train-with-pytorch-trainer)!
-
-</Tip>
-
-أنت مستعد الآن لبدء تدريب نموذجك! قم بتحميل DistilBERT مع [`AutoModelForTokenClassification`] إلى جانب عدد التصنيفات المتوقعة، وخريطة التسميات:
-
-```py
->>> from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer
-
->>> model = AutoModelForTokenClassification.from_pretrained(
-...     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
-... )
-```
-
-في هذه المرحلة، هناك ثلاث خطوات فقط متبقية:
-
-1. حدد معلمات التدريب الخاصة بك في [`TrainingArguments`]. المعامل الوحيد المطلوب هو `output_dir` الذي يحدد مكان حفظ نموذجك. ستقوم بدفع هذا النموذج إلى Hub عن طريق تعيين `push_to_hub=True` (يجب أن تكون مسجلاً الدخول إلى Hugging Face لتحميل نموذجك). في نهاية كل حقبة، سيقوم [`Trainer`] بتقييم درجات seqeval وحفظ تسخة التدريب.
-2. قم بتمرير معاملات التدريب إلى [`Trainer`] إلى جانب النموذج، ومجموعة البيانات، والمُجزِّئ اللغوي، و`data collator`، ودالة `compute_metrics`.
-3.استدعِ [`~Trainer.train`] لتدريب نموذجك.
-
-```py
->>> training_args = TrainingArguments(
-...     output_dir="my_awesome_wnut_model",
-...     learning_rate=2e-5,
-...     per_device_train_batch_size=16,
-...     per_device_eval_batch_size=16,
-...     num_train_epochs=2,
-...     weight_decay=0.01,
-...     eval_strategy="epoch",
-...     save_strategy="epoch",
-...     load_best_model_at_end=True,
-...     push_to_hub=True,
-... )
-
->>> trainer = Trainer(
-...     model=model,
-...     args=training_args,
-...     train_dataset=tokenized_wnut["train"],
-...     eval_dataset=tokenized_wnut["test"],
-...     processing_class=tokenizer,
-...     data_collator=data_collator,
-...     compute_metrics=compute_metrics,
-... )
-
->>> trainer.train()
-```
-
-بمجرد اكتمال التدريب، شارك نموذجك على Hub باستخدام طريقة [`~transformers.Trainer.push_to_hub`] حتى يتمكن الجميع من استخدام نموذجك:
-
-```py
->>> trainer.push_to_hub()
-```
-</pt>
-<tf>
-<Tip>
-
-إذا لم تكن على دراية بتعديل نموذج باستخدام Keras، ألق نظرة على الدليل التعليمي الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
-
-</Tip>
-للتعديل على نموذج في TensorFlow، ابدأ بإعداد دالة محسن، وجدول معدل التعلم، وبعض معلمات التدريب:
-
-```py
->>> from transformers import create_optimizer
-
->>> batch_size = 16
->>> num_train_epochs = 3
->>> num_train_steps = (len(tokenized_wnut["train"]) // batch_size) * num_train_epochs
->>> optimizer, lr_schedule = create_optimizer(
-...     init_lr=2e-5,
-...     num_train_steps=num_train_steps,
-...     weight_decay_rate=0.01,
-...     num_warmup_steps=0,
-... )
-```
-
-ثم يمكنك تحميل DistilBERT مع [`TFAutoModelForTokenClassification`] إلى جانب عدد التسميات المتوقعة، وتخطيطات التسميات:
-
-```py
->>> from transformers import TFAutoModelForTokenClassification
-
->>> model = TFAutoModelForTokenClassification.from_pretrained(
-...     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
-... )
-```
-
-قم بتحويل مجموعات بياناتك إلى تنسيق `tf.data.Dataset` مع [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
-
-```py
->>> tf_train_set = model.prepare_tf_dataset(
-...     tokenized_wnut["train"],
-...     shuffle=True,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-
->>> tf_validation_set = model.prepare_tf_dataset(
-...     tokenized_wnut["validation"],
-...     shuffle=False,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-```
-
-هيّئ النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن نماذج Transformers تتضمن دالة خسارة افتراضية مرتبطة بالمهمة، لذلك لا تحتاج إلى تحديد واحدة إلا إذا كنت ترغب في ذلك:
-
-```py
->>> import tensorflow as tf
-
->>> model.compile(optimizer=optimizer)  # No loss argument!
-```
-
-آخر أمرين يجب إعدادهما قبل بدء التدريب هو حساب درجات seqeval من التنبؤات، وتوفير طريقة لدفع نموذجك إلى Hub. يتم ذلك باستخدام [Keras callbacks](../main_classes/keras_callbacks).
-
-مرر دالة `compute_metrics` الخاصة بك إلى [`~transformers.KerasMetricCallback`]:
-
-```py
->>> from transformers.keras_callbacks import KerasMetricCallback
-
->>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
-```
-
-حدد مكان دفع نموذجك والمحلل اللغوي في [`~transformers.PushToHubCallback`]:
-
-```py
->>> from transformers.keras_callbacks import PushToHubCallback
-
->>> push_to_hub_callback = PushToHubCallback(
-...     output_dir="my_awesome_wnut_model",
-...     tokenizer=tokenizer,
-... )
-```
-
-ثم جمّع callbacks الخاصة بك معًا:
-
-```py
->>> callbacks = [metric_callback, push_to_hub_callback]
-```
-
-أخيرًا، أنت جاهز الآن لبدء تدريب نموذجك! قم باستدعاء [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع بيانات التدريب والتحقق، وعدد الحقبات، وcallbacks لتعديل النموذج:
-
-```py
->>> model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
-```
-
-بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
-</tf>
-</frameworkcontent>
-
-<Tip>
-
-للحصول على مثال أكثر تفصيلاً حول كيفية تعديل نموذج لتصنيف الرموز، ألق نظرة على الدفتر المقابل
-[دفتر PyTorch](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/token_classification.ipynb)
-أو [دفتر TensorFlow](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/token_classification-tf.ipynb).
-
-</Tip>
-
-## الاستدلال(Inference)
-
-رائع، الآن بعد أن قمت بتعديل نموذج، يمكنك استخدامه للاستدلال!
-
-احصل على بعض النصوص التي تريد تشغيل الاستدلال عليها:
-
-```py
->>> text = "The Golden State Warriors are an American professional basketball team based in San Francisco."
-```
-
-أبسط طريقة لتجربة نموذجك المُدرب مسبقًا للاستدلال هي استخدامه في [`pipeline`]. قم بتنفيذ `pipeline` لتصنيف الكيانات المسماة مع نموذجك، ومرر نصك إليه:
-
-```py
->>> from transformers import pipeline
-
->>> classifier = pipeline("ner", model="stevhliu/my_awesome_wnut_model")
->>> classifier(text)
-[{'entity': 'B-location',
-  'score': 0.42658573,
-  'index': 2,
-  'word': 'golden',
-  'start': 4,
-  'end': 10},
- {'entity': 'I-location',
-  'score': 0.35856336,
-  'index': 3,
-  'word': 'state',
-  'start': 11,
-  'end': 16},
- {'entity': 'B-group',
-  'score': 0.3064001,
-  'index': 4,
-  'word': 'warriors',
-  'start': 17,
-  'end': 25},
- {'entity': 'B-location',
-  'score': 0.65523505,
-  'index': 13,
-  'word': 'san',
-  'start': 80,
-  'end': 83},
- {'entity': 'B-location',
-  'score': 0.4668663,
-  'index': 14,
-  'word': 'francisco',
-  'start': 84,
-  'end': 93}]
-```
-
-يمكنك أيضًا تكرار نتائج `pipeline` يدويًا إذا أردت:
-
-<frameworkcontent>
-<pt>
-قسّم النص إلى رموز وأرجع المُوتّرات بلغة PyTorch:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
->>> inputs = tokenizer(text, return_tensors="pt")
-```
-
-مرر مدخلاتك إلى النموذج واحصل على `logits`:
-
-```py
->>> from transformers import AutoModelForTokenClassification
-
->>> model = AutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
->>> with torch.no_grad():
-...     logits = model(**inputs).logits
-```
-
-استخرج الفئة ذات الاحتمالية الأعلى، واستخدم جدول `id2label` الخاصة بالنموذج لتحويلها إلى تسمية نصية:
-
-```py
->>> predictions = torch.argmax(logits, dim=2)
->>> predicted_token_class = [model.config.id2label[t.item()] for t in predictions[0]]
->>> predicted_token_class
-['O',
- 'O',
- 'B-location',
- 'I-location',
- 'B-group',
- 'O',
- 'O',
- 'O',
- 'O',
- 'O',
- 'O',
- 'O',
- 'O',
- 'B-location',
- 'B-location',
- 'O',
- 'O']
-```
-</pt>
-<tf>
-قسّم النص إلى رموز وأرجع المُوتّرات ب TensorFlow:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
->>> inputs = tokenizer(text, return_tensors="tf")
-```
-
-مرر مدخلاتك إلى النموذج واحصل على `logits`:
-
-```py
->>> from transformers import TFAutoModelForTokenClassification
-
->>> model = TFAutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
->>> logits = model(**inputs).logits
-```
-
-استخرج الفئة ذات الاحتمالية الأعلى، واستخدم جدول `id2label` الخاصة بالنموذج لتحويلها إلى تسمية نصية:
-
-```py
->>> predicted_token_class_ids = tf.math.argmax(logits, axis=-1)
->>> predicted_token_class = [model.config.id2label[t] for t in predicted_token_class_ids[0].numpy().tolist()]
->>> predicted_token_class
-['O',
- 'O',
- 'B-location',
- 'I-location',
- 'B-group',
- 'O',
- 'O',
- 'O',
- 'O',
- 'O',
- 'O',
- 'O',
- 'O',
- 'B-location',
- 'B-location',
- 'O',
- 'O']
-```
-</tf>
-</frameworkcontent>
--- a/docs/source/ar/tasks/translation.md
+++ b/docs/source/ar/tasks/translation.md
@ -1,407 +0,0 @@
-<!--Copyright 2022 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# الترجمة(Translation)
-
-[[open-in-colab]]
-
-<Youtube id="1JvfrvZgi6c"/>
-
-الترجمة هي عملية تحويل سلسلة نصية من لغة إلى أخرى. وهي إحدى المهام التي يمكن صياغتها كمسألة تسلسل إلى تسلسل، وهو إطار عمل قوي لإنتاج مخرجات من مدخلات، مثل الترجمة أو التلخيص. تُستخدم أنظمة الترجمة عادةً للترجمة بين نصوص لغات مختلفة، ويمكن استخدامها أيضًا لترجمة الكلام أو لمهام تجمع بين النصوص والكلام، مثل تحويل النص إلى كلام أو تحويل الكلام إلى نص.
-
-سيوضح لك هذا الدليل كيفية:
-
-1. ضبط دقيق لنموذج [T5](https://huggingface.co/google-t5/t5-small) على المجموعة الفرعية الإنجليزية-الفرنسية من مجموعة بيانات [OPUS Books](https://huggingface.co/datasets/opus_books) لترجمة النص الإنجليزي إلى الفرنسية.
-2. استخدام النموذج المضبوط بدقة للاستدلال.
-
-<Tip>
-
-لمشاهدة جميع البنى والنسخ المتوافقة مع هذه المهمة، نوصي بالتحقق من [صفحة المهمة](https://huggingface.co/tasks/translation).
-
-</Tip>
-
-قبل البدء، تأكد من تثبيت جميع المكتبات الضرورية:
-
-```bash
-pip install transformers datasets evaluate sacrebleu
-```
-
-نشجعك على تسجيل الدخول إلى حساب Hugging Face الخاص بك حتى تتمكن من تحميل نموذجك ومشاركته مع المجتمع. عند الطلب، أدخل الرمز المميز الخاص بك لتسجيل الدخول:
-
-```py
->>> from huggingface_hub import notebook_login
-
->>> notebook_login()
-```
-
-## تحميل مجموعة بيانات OPUS Books
-
-ابدأ بتحميل المجموعة الفرعية الإنجليزية-الفرنسية من مجموعة بيانات [OPUS Books](https://huggingface.co/datasets/opus_books) من مكتبة 🤗 Datasets:
-
-```py
->>> from datasets import load_dataset
-
->>> books = load_dataset("opus_books", "en-fr")
-```
-
-قسّم مجموعة البيانات إلى مجموعة تدريب ومجموعة اختبار باستخدام طريقة [`~datasets.Dataset.train_test_split`]:
-
-```py
->>> books = books["train"].train_test_split(test_size=0.2)
-```
-
-ثم ألقِ نظرة على مثال:
-
-```py
->>> books["train"][0]
-{'id': '90560',
- 'translation': {'en': 'But this lofty plateau measured only a few fathoms, and soon we reentered Our Element.',
-  'fr': 'Mais ce plateau élevé ne mesurait que quelques toises, et bientôt nous fûmes rentrés dans notre élément.'}}
-```
-
-`translation`: ترجمة إنجليزية وفرنسية للنص.
-
-## المعالجة المسبقة(Preprocess)
-
-<Youtube id="XAR8jnZZuUs"/>
-
-الخطوة التالية هي تحميل مُجزئ T5 لمعالجة أزواج اللغة الإنجليزية-الفرنسية:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> checkpoint = "google-t5/t5-small"
->>> tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-```
-
-يجب أن تقوم دالة المعالجة المسبقة التي تُريد إنشاءها بما يلي:
-
-1. إضافة بادئة إلى المُدخل بمُوجه حتى يعرف T5 أن هذه مهمة ترجمة. تتطلب بعض النماذج القادرة على أداء مهام متعددة توجيهًا لمهام مُحددة.
-2. تعيين اللغة الهدف (الفرنسية) في معامل `text_target` لضمان معالجة المُجزئ للنص بشكل صحيح. إذا لم تُعيّن `text_target`، فسيُعالج المُجزئ النص على أنه إنجليزي.
-3. اقتطاع التسلسلات بحيث لا يزيد طولها عن الحد الأقصى الذي يحدده معامل `max_length`.
-
-```py
->>> source_lang = "en"
->>> target_lang = "fr"
->>> prefix = "translate English to French: "
-
->>> def preprocess_function(examples):
-...     inputs = [prefix + example[source_lang] for example in examples["translation"]]
-...     targets = [example[target_lang] for example in examples["translation"]]
-...     model_inputs = tokenizer(inputs, text_target=targets, max_length=128, truncation=True)
-...     return model_inputs
-```
-
-لتطبيق دالة المعالجة المسبقة على مجموعة البيانات بأكملها، استخدم طريقة [`~datasets.Dataset.map`] من 🤗 Datasets. يمكنك تسريع دالة `map` عن طريق تعيين `batched=True` لمعالجة عناصر متعددة من مجموعة البيانات في وقت واحد:
-
-```py
->>> tokenized_books = books.map(preprocess_function, batched=True)
-```
-
-الآن أنشئ دفعة من الأمثلة باستخدام [`DataCollatorForSeq2Seq`]. من الأكثر كفاءة *الحشو الديناميكي* للجمل إلى أطول طول في دفعة أثناء التجميع، بدلاً من حشو مجموعة البيانات بأكملها إلى الحد الأقصى للطول.
-
-<frameworkcontent>
-<pt>
-
-```py
->>> from transformers import DataCollatorForSeq2Seq
-
->>> data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
-```
-</pt>
-<tf>
-
-```py
->>> from transformers import DataCollatorForSeq2Seq
-
->>> data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
-```
-</tf>
-</frameworkcontent>
-
-## التقييم (Evaluate)
-
-غالباً ما يكون تضمين مقياس أثناء التدريب مفيداً لتقييم أداء نموذجك. يمكنك تحميل طريقة تقييم بسرعة باستخدام مكتبة 🤗 [Evaluate](https://huggingface.co/docs/evaluate/index). لهذه المهمة، حمّل مقياس [SacreBLEU](https://huggingface.co/spaces/evaluate-metric/sacrebleu) (راجع [الجولة السريعة](https://huggingface.co/docs/evaluate/a_quick_tour) لـ 🤗 Evaluate لمعرفة المزيد حول كيفية تحميل وحساب مقياس):
-
-```py
->>> import evaluate
-
->>> metric = evaluate.load("sacrebleu")
-```
-
-ثم أنشئ دالة تُمرر تنبؤاتك وتسمياتك إلى [`~evaluate.EvaluationModule.compute`] لحساب درجة SacreBLEU:
-
-```py
->>> import numpy as np
-
->>> def postprocess_text(preds, labels):
-...     preds = [pred.strip() for pred in preds]
-...     labels = [[label.strip()] for label in labels]
-
-...     return preds, labels
-
->>> def compute_metrics(eval_preds):
-...     preds, labels = eval_preds
-...     if isinstance(preds, tuple):
-...         preds = preds[0]
-...     decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
-
-...     labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
-...     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
-
-...     decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)
-
-...     result = metric.compute(predictions=decoded_preds, references=decoded_labels)
-...     result = {"bleu": result["score"]}
-
-...     prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
-...     result["gen_len"] = np.mean(prediction_lens)
-...     result = {k: round(v, 4) for k, v in result.items()}
-...     return result
-```
-
-دالة `compute_metrics` الخاصة بك جاهزة الآن، وسوف تعود إليها عند إعداد التدريب.
-
-## التدريب (Train)
-
-<frameworkcontent>
-<pt>
-
-<Tip>
-
-إذا لم تكن معتادًا على ضبط دقيق نموذج باستخدام [`Trainer`], فألقِ نظرة على البرنامج التعليمي الأساسي [هنا](../training#train-with-pytorch-trainer)!
-
-</Tip>
-
-أنت جاهز لبدء تدريب نموذجك الآن! حمّل T5 باستخدام [`AutoModelForSeq2SeqLM`]:
-
-```py
->>> from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
-
->>> model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
-```
-
-في هذه المرحلة، تبقى ثلاث خطوات فقط:
-
-1. حدد مُعاملات للتدريب في [`Seq2SeqTrainingArguments`]. المُعامل الوحيدة المطلوبة هي `output_dir` التي تحدد مكان حفظ النموذج الخاص بك. ستقوم بدفع هذا النموذج إلى Hub عن طريق تعيين `push_to_hub=True` (يجب عليك تسجيل الدخول إلى Hugging Face لتحميل نموذجك). في نهاية كل حقبة، سيقوم [`Trainer`] بتقييم مقياس SacreBLEU وحفظ نقطة تدقيق التدريب.
-2. مرر مُعاملات التدريب إلى [`Seq2SeqTrainer`] جنبًا إلى جنب مع النموذج ومجموعة البيانات والمعالج اللغوي وجامع البيانات ووظيفة `compute_metrics`.
-3. نفّذ [`~Trainer.train`] لضبط نموذجك.
-
-```py
->>> training_args = Seq2SeqTrainingArguments(
-...     output_dir="my_awesome_opus_books_model",
-...     eval_strategy="epoch",
-...     learning_rate=2e-5,
-...     per_device_train_batch_size=16,
-...     per_device_eval_batch_size=16,
-...     weight_decay=0.01,
-...     save_total_limit=3,
-...     num_train_epochs=2,
-...     predict_with_generate=True,
-...     fp16=True, #change to bf16=True for XPU
-...     push_to_hub=True,
-... )
-
->>> trainer = Seq2SeqTrainer(
-...     model=model,
-...     args=training_args,
-...     train_dataset=tokenized_books["train"],
-...     eval_dataset=tokenized_books["test"],
-...     processing_class=tokenizer,
-...     data_collator=data_collator,
-...     compute_metrics=compute_metrics,
-... )
-
->>> trainer.train()
-```
-
-بمجرد اكتمال التدريب، شارك نموذجك مع Hub باستخدام طريقة [`~transformers.Trainer.push_to_hub`] حتى يتمكن الجميع من استخدام نموذجك:
-
-```py
->>> trainer.push_to_hub()
-```
-</pt>
-<tf>
-<Tip>
-
-إذا لم تكن معتادًا على ضبط نموذج باستخدام Keras، فألق نظرة على البرنامج التعليمي الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
-
-</Tip>
-لضبط نموذج في TensorFlow، ابدأ بإعداد دالة مُحسِّن وجدول معدل تعلم وبعض المعلمات الفائقة للتدريب:
-
-```py
->>> from transformers import AdamWeightDecay
-
->>> optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
-```
-
-ثم يمكنك تحميل T5 باستخدام [`TFAutoModelForSeq2SeqLM`]:
-
-```py
->>> from transformers import TFAutoModelForSeq2SeqLM
-
->>> model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
-```
-
-حوّل مجموعات البيانات الخاصة بك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
-
-```py
->>> tf_train_set = model.prepare_tf_dataset(
-...     tokenized_books["train"],
-...     shuffle=True,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-
->>> tf_test_set = model.prepare_tf_dataset(
-...     tokenized_books["test"],
-...     shuffle=False,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-```
-
-قم بتكوين النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن جميع نماذج Transformers تحتوي على دالة خسارة ذات صلة بالمهمة بشكل افتراضي، لذلك لا تحتاج إلى تحديد واحدة إلا إذا كنت ترغب في ذلك:
-
-```py
->>> import tensorflow as tf
-
->>> model.compile(optimizer=optimizer)  # No loss argument!
-```
-
-آخر شيئين يجب إعدادهما قبل بدء التدريب هما حساب مقياس SacreBLEU من التوقعات، وتوفير طريقة لدفع نموذجك إلى Hub. يتم كلاهما باستخدام [استدعاءات Keras](../main_classes/keras_callbacks).
-
-مرر دالة `compute_metrics` الخاصة بك إلى [`~transformers.KerasMetricCallback`]:
-
-```py
->>> from transformers.keras_callbacks import KerasMetricCallback
-
->>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_test_set)
-```
-
-حدد مكان دفع نموذجك ومعالجك اللغوي في [`~transformers.PushToHubCallback`]:
-
-```py
->>> from transformers.keras_callbacks import PushToHubCallback
-
->>> push_to_hub_callback = PushToHubCallback(
-...     output_dir="my_awesome_opus_books_model",
-...     tokenizer=tokenizer,
-... )
-```
-
-ثم اجمع استدعاءاتك معًا:
-
-```py
->>> callbacks = [metric_callback, push_to_hub_callback]
-```
-
-أخيرًا، أنت جاهز لبدء تدريب نموذجك! اتصل بـ [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق من الصحة وعدد الحقب واستدعاءاتك لضبط النموذج:
-
-```py
->>> model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
-```
-
-بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
-</tf>
-</frameworkcontent>
-
-<Tip>
-
-للحصول على مثال أكثر تعمقًا لكيفية ضبط نموذج للترجمة، ألق نظرة على [دفتر ملاحظات PyTorch](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/translation.ipynb) المقابل
-أو [دفتر ملاحظات TensorFlow](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/translation-tf.ipynb).
-
-</Tip>
-
-## الاستدلال (Inference)
-
-رائع، الآن بعد أن قمت بضبط نموذج، يمكنك استخدامه للاستدلال!
-
-أحضر بعض النصوص التي ترغب في ترجمتها إلى لغة أخرى. بالنسبة لـ T5، تحتاج إلى إضافة بادئة إلى مدخلاتك اعتمادًا على المهمة التي تعمل عليها. للترجمة من الإنجليزية إلى الفرنسية، يجب عليك إضافة بادئة إلى مدخلاتك كما هو موضح أدناه:
-
-```py
->>> text = "translate English to French: Legumes share resources with nitrogen-fixing bacteria."
-```
-
-أبسط طريقة لتجربة نموذجك المضبوط للاستدلال هي استخدامه في [`pipeline`]. قم بإنشاء مثيل لـ `pipeline` للترجمة باستخدام نموذجك، ومرر النص الخاص بك إليه:
-
-```py
->>> from transformers import pipeline
-
-# تغيير `xx` إلى لغة الإدخال و `yy` إلى لغة المخرجات المطلوبة.
-# أمثلة: "en" للغة الإنجليزية، "fr" للغة الفرنسية، "de" للغة الألمانية، "es" للغة الإسبانية، "zh" للغة الصينية، إلخ؛ translation_en_to_fr تترجم من الإنجليزية إلى الفرنسية
-# يمكنك عرض جميع قوائم اللغات هنا - https://huggingface.co/languages
->>> translator = pipeline("translation_xx_to_yy", model="username/my_awesome_opus_books_model")
->>> translator(text)
-[{'translation_text': 'Legumes partagent des ressources avec des bactéries azotantes.'}]
-```
-
-يمكنك أيضًا تكرار نتائج `pipeline` يدويًا إذا أردت:
-
-<frameworkcontent>
-<pt>
-قم بتحويل النص إلى رموز وإرجاع `input_ids` كموترات PyTorch:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_opus_books_model")
->>> inputs = tokenizer(text, return_tensors="pt").input_ids
-```
-
-استخدم الدالة [`~generation.GenerationMixin.generate`] لإنشاء الترجمة. لمزيد من التفاصيل حول استراتيجيات توليد النصوص المختلفة والمعلمات للتحكم في التوليد، تحقق من واجهة برمجة تطبيقات [توليد النصوص](../main_classes/text_generation).
-
-```py
->>> from transformers import AutoModelForSeq2SeqLM
-
->>> model = AutoModelForSeq2SeqLM.from_pretrained("username/my_awesome_opus_books_model")
->>> outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
-```
-
-فك تشفير معرفات الرموز المولدة مرة أخرى إلى نص:
-
-```py
->>> tokenizer.decode(outputs[0], skip_special_tokens=True)
-'Les lignées partagent des ressources avec des bactéries enfixant l'azote.'
-```
-</pt>
-<tf>
-قم بتحويل النص إلى رموز وإرجاع `input_ids` كموترات TensorFlow:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_opus_books_model")
->>> inputs = tokenizer(text, return_tensors="tf").input_ids
-```
-
-استخدم طريقة [`~transformers.generation_tf_utils.TFGenerationMixin.generate`] لإنشاء الترجمة. لمزيد من التفاصيل حول استراتيجيات توليد النصوص المختلفة والمعلمات للتحكم في التوليد، تحقق من واجهة برمجة تطبيقات [توليد النصوص](../main_classes/text_generation).
-
-```py
->>> from transformers import TFAutoModelForSeq2SeqLM
-
->>> model = TFAutoModelForSeq2SeqLM.from_pretrained("username/my_awesome_opus_books_model")
->>> outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
-```
-
-فك تشفير معرفات الرموز المولدة مرة أخرى إلى نص:
-
-```py
->>> tokenizer.decode(outputs[0], skip_special_tokens=True)
-'Les lugumes partagent les ressources avec des bactéries fixatrices d'azote.'
-```
-</tf>
-</frameworkcontent>
--- a/docs/source/ar/tiktoken.md
+++ b/docs/source/ar/tiktoken.md
@ -1,41 +0,0 @@
-# Tiktoken والتفاعل مع Transformers
-
-يتم دمج دعم ملفات نموذج tiktoken بسلاسة في 🤗 transformers عند تحميل النماذج
-`from_pretrained` مع ملف `tokenizer.model` tiktoken على Hub، والذي يتم تحويله تلقائيًا إلى [المحلل اللغوي السريع](https://huggingface.co/docs/transformers/main/en/main_classes/tokenizer#transformers.PreTrainedTokenizerFast).
-
-### النماذج المعروفة التي تم إصدارها مع `tiktoken.model`:
-	- gpt2
-	- llama3
-
-## مثال على الاستخدام
-
-من أجل تحميل ملفات `tiktoken` في `transformers`، تأكد من أن ملف `tokenizer.model` هو ملف tiktoken وسيتم تحميله تلقائيًا عند التحميل `from_pretrained`. إليك كيفية تحميل مجزىء لغوي ونموذج، والذي
-يمكن تحميله من نفس الملف بالضبط:
-
-```py
-from transformers import AutoTokenizer
-
-model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
-tokenizer = AutoTokenizer.from_pretrained(model_id, subfolder="original")
-```
-## إنشاء مجزىء لغوي tiktoken
-
-لا يحتوي ملف `tokenizer.model` على أي معلومات حول الرموز أو الأنماط الإضافية. إذا كانت هذه الأمور مهمة، قم بتحويل المحلل اللغوي إلى `tokenizer.json`، وهو التنسيق المناسب لـ [`PreTrainedTokenizerFast`].
-
-قم بتوليد ملف `tokenizer.model` باستخدام [tiktoken.get_encoding](https://github.com/openai/tiktoken/blob/63527649963def8c759b0f91f2eb69a40934e468/tiktoken/registry.py#L63) ثم قم بتحويله إلى `tokenizer.json` باستخدام [`convert_tiktoken_to_fast`].
-
-```py
-
-from transformers.integrations.tiktoken import convert_tiktoken_to_fast
-from tiktoken import get_encoding
-
-# يمكنك تحميل ترميزك المخصص أو الترميز الذي توفره OpenAI
-encoding = get_encoding("gpt2")
-convert_tiktoken_to_fast(encoding, "config/save/dir")
-```
-
-يتم حفظ ملف `tokenizer.json` الناتج في الدليل المحدد ويمكن تحميله باستخدام [`PreTrainedTokenizerFast`].
-
-```py
-tokenizer = PreTrainedTokenizerFast.from_pretrained("config/save/dir")
-```
--- a/docs/source/de/quicktour.md
+++ b/docs/source/de/quicktour.md
@ -109,7 +109,7 @@ label: NEGATIVE, with score: 0.5309
 Die [`pipeline`] kann auch über einen ganzen Datensatz iterieren. Starten wir mit der Installation der [🤗 Datasets](https://huggingface.co/docs/datasets/) Bibliothek:

 ```bash
-pip install datasets
+pip install datasets 
 ```

 Erstellen wir eine [`pipeline`] mit der Aufgabe die wir lösen und dem Modell welches wir nutzen möchten.
@ -191,7 +191,7 @@ Wenn Sie kein Modell für Ihren Anwendungsfall finden können, müssen Sie ein v

 <Youtube id="AhChOFRegn4"/>

-Unter der Haube arbeiten die Klassen [`AutoModelForSequenceClassification`] und [`AutoTokenizer`] zusammen, um die [`pipeline`] zu betreiben. Eine [`AutoClass`](./model_doc/auto) ist eine Abkürzung, die automatisch die Architektur eines trainierten Modells aus dessen Namen oder Pfad abruft. Sie müssen nur die passende `AutoClass` für Ihre Aufgabe und den zugehörigen Tokenizer mit [`AutoTokenizer`] auswählen.
+Unter der Haube arbeiten die Klassen [`AutoModelForSequenceClassification`] und [`AutoTokenizer`] zusammen, um die [`pipeline`] zu betreiben. Eine [`AutoClass`](./model_doc/auto) ist eine Abkürzung, die automatisch die Architektur eines trainierten Modells aus dessen Namen oder Pfad abruft. Sie müssen nur die passende `AutoClass` für Ihre Aufgabe und den zugehörigen Tokenizer mit [`AutoTokenizer`] auswählen. 

 Kehren wir zu unserem Beispiel zurück und sehen wir uns an, wie Sie die `AutoClass` verwenden können, um die Ergebnisse der [`pipeline`] zu replizieren.

@ -281,7 +281,7 @@ Jetzt können Sie Ihren vorverarbeiteten Stapel von Eingaben direkt an das Model
 ```

 Das Modell gibt die endgültigen Aktivierungen in dem Attribut "logits" aus. Wenden Sie die Softmax-Funktion auf die "logits" an, um die Wahrscheinlichkeiten zu erhalten:
-
+  
 ```py
 >>> from torch import nn

@ -308,7 +308,7 @@ In der [Aufgabenzusammenfassung](./task_summary) steht, welche [AutoModel]-Klass
 </Tip>

 Jetzt können Sie Ihren vorverarbeiteten Stapel von Eingaben direkt an das Modell übergeben, indem Sie die Wörterbuchschlüssel direkt an die Tensoren übergeben:
-
+  
 ```py
 >>> tf_outputs = tf_model(tf_batch)
 ```
@ -383,8 +383,8 @@ Ein besonders cooles 🤗 Transformers-Feature ist die Möglichkeit, ein Modell
 ```py
 >>> from transformers import AutoModel

->>> tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
->>> pt_model = AutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
+>>> tokenizer = AutoTokenizer.from_pretrained(tf_save_directory)
+>>> pt_model = AutoModelForSequenceClassification.from_pretrained(tf_save_directory, from_tf=True)
 ```
 </pt>
 <tf>
@ -392,8 +392,8 @@ Ein besonders cooles 🤗 Transformers-Feature ist die Möglichkeit, ein Modell
 ```py
 >>> from transformers import TFAutoModel

->>> tokenizer = AutoTokenizer.from_pretrained(tf_save_directory)
->>> tf_model = TFAutoModelForSequenceClassification.from_pretrained(tf_save_directory, from_tf=True)
+>>> tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
+>>> tf_model = TFAutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
 ```
 </tf>
 </frameworkcontent>
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@ -110,17 +110,6 @@
    - local: kv_cache
      title: Best Practices for Generation with Cache
    title: Generation
-  - isExpanded: false
-    sections:
-    - local: chat_template_basics
-      title: Getting Started with Chat Templates for Text LLMs
-    - local: chat_template_multimodal
-      title: Multimodal Chat Templates for Vision and Audio LLMs
-    - local: chat_template_tools_and_documents
-      title: Expanding Chat Templates with Tools and Documents
-    - local: chat_template_advanced
-      title: Advanced Usage and Customizing Your Chat Templates
-    title: Chat Templates
  - isExpanded: false
    sections:
    - local: tasks/idefics
@ -138,6 +127,8 @@
    title: Use model-specific APIs
  - local: custom_models
    title: Share a custom model
+  - local: chat_templating
+    title: Chat templates
  - local: trainer
    title: Trainer
  - local: sagemaker
@ -148,6 +139,8 @@
    title: Export to TFLite
  - local: torchscript
    title: Export to TorchScript
+  - local: benchmarks
+    title: Benchmarks
  - local: notebooks
    title: Notebooks with examples
  - local: community
@ -174,16 +167,10 @@
    title: AWQ
  - local: quantization/aqlm
    title: AQLM
-  - local: quantization/vptq
-    title: SpQR
-  - local: quantization/spqr
-    title: VPTQ
  - local: quantization/quanto
    title: Quanto
  - local: quantization/eetq
    title: EETQ
-  - local: quantization/higgs
-    title: HIGGS
  - local: quantization/hqq
    title: HQQ
  - local: quantization/fbgemm_fp8
@ -196,8 +183,6 @@
    title: BitNet
  - local: quantization/compressed_tensors
    title: compressed-tensors
-  - local: quantization/finegrained_fp8
-    title: Fine-grained FP8
  - local: quantization/contribute
    title: Contribute new quantization method
  title: Quantization Methods
@ -337,8 +322,6 @@
      sections:
      - local: model_doc/albert
        title: ALBERT
-      - local: model_doc/bamba
-        title: Bamba
      - local: model_doc/bart
        title: BART
      - local: model_doc/barthez
@ -379,8 +362,6 @@
        title: CodeLlama
      - local: model_doc/cohere
        title: Cohere
-      - local: model_doc/cohere2
-        title: Cohere2
      - local: model_doc/convbert
        title: ConvBERT
      - local: model_doc/cpm
@ -397,8 +378,6 @@
        title: DeBERTa-v2
      - local: model_doc/dialogpt
        title: DialoGPT
-      - local: model_doc/diffllama
-        title: DiffLlama
      - local: model_doc/distilbert
        title: DistilBERT
      - local: model_doc/dpr
@ -415,10 +394,10 @@
        title: ESM
      - local: model_doc/falcon
        title: Falcon
-      - local: model_doc/falcon3
-        title: Falcon3
      - local: model_doc/falcon_mamba
        title: FalconMamba
+      - local: model_doc/fastspeech2_conformer
+        title: FastSpeech2Conformer
      - local: model_doc/flan-t5
        title: FLAN-T5
      - local: model_doc/flan-ul2
@ -461,12 +440,6 @@
        title: Granite
      - local: model_doc/granitemoe
        title: GraniteMoe
-      - local: model_doc/granitemoeshared
-        title: GraniteMoeShared
-      - local: model_doc/granitevision
-        title: GraniteVision
-      - local: model_doc/helium
-        title: Helium
      - local: model_doc/herbert
        title: HerBERT
      - local: model_doc/ibert
@ -519,8 +492,6 @@
        title: mLUKE
      - local: model_doc/mobilebert
        title: MobileBERT
-      - local: model_doc/modernbert
-        title: ModernBert
      - local: model_doc/mpnet
        title: MPNet
      - local: model_doc/mpt
@ -641,8 +612,6 @@
        title: YOSO
      - local: model_doc/zamba
        title: Zamba
-      - local: model_doc/zamba2
-        title: Zamba2
      title: Text models
    - isExpanded: false
      sections:
@ -658,8 +627,6 @@
        title: ConvNeXTV2
      - local: model_doc/cvt
        title: CvT
-      - local: model_doc/dab-detr
-        title: DAB-DETR
      - local: model_doc/deformable_detr
        title: Deformable DETR
      - local: model_doc/deit
@ -668,8 +635,6 @@
        title: Depth Anything
      - local: model_doc/depth_anything_v2
        title: Depth Anything V2
-      - local: model_doc/depth_pro
-        title: DepthPro
      - local: model_doc/deta
        title: DETA
      - local: model_doc/detr
@ -678,8 +643,6 @@
        title: DiNAT
      - local: model_doc/dinov2
        title: DINOV2
-      - local: model_doc/dinov2_with_registers
-        title: DINOv2 with Registers
      - local: model_doc/dit
        title: DiT
      - local: model_doc/dpt
@ -726,14 +689,10 @@
        title: ResNet
      - local: model_doc/rt_detr
        title: RT-DETR
-      - local: model_doc/rt_detr_v2
-        title: RT-DETRv2
      - local: model_doc/segformer
        title: SegFormer
      - local: model_doc/seggpt
        title: SegGpt
-      - local: model_doc/superglue
-        title: SuperGlue
      - local: model_doc/superpoint
        title: SuperPoint
      - local: model_doc/swiftformer
@ -746,10 +705,6 @@
        title: Swin2SR
      - local: model_doc/table-transformer
        title: Table Transformer
-      - local: model_doc/textnet
-        title: TextNet
-      - local: model_doc/timm_wrapper
-        title: Timm Wrapper
      - local: model_doc/upernet
        title: UperNet
      - local: model_doc/van
@ -766,8 +721,6 @@
        title: ViTMatte
      - local: model_doc/vit_msn
        title: ViTMSN
-      - local: model_doc/vitpose
-        title: ViTPose
      - local: model_doc/yolos
        title: YOLOS
      - local: model_doc/zoedepth
@ -785,8 +738,8 @@
        title: dac
      - local: model_doc/encodec
        title: EnCodec
-      - local: model_doc/fastspeech2_conformer
-        title: FastSpeech2Conformer
+      - local: model_doc/hiera
+        title: Hiera
      - local: model_doc/hubert
        title: Hubert
      - local: model_doc/mctct
@ -795,8 +748,6 @@
        title: Mimi
      - local: model_doc/mms
        title: MMS
-      - local: model_doc/moonshine
-        title: Moonshine
      - local: model_doc/moshi
        title: Moshi
      - local: model_doc/musicgen
@ -859,8 +810,6 @@
        title: ALIGN
      - local: model_doc/altclip
        title: AltCLIP
-      - local: model_doc/aria
-        title: Aria
      - local: model_doc/blip
        title: BLIP
      - local: model_doc/blip-2
@ -879,22 +828,16 @@
        title: CLIPSeg
      - local: model_doc/clvp
        title: CLVP
-      - local: model_doc/colpali
-        title: ColPali
      - local: model_doc/data2vec
        title: Data2Vec
      - local: model_doc/deplot
        title: DePlot
      - local: model_doc/donut
        title: Donut
-      - local: model_doc/emu3
-        title: Emu3
      - local: model_doc/flava
        title: FLAVA
      - local: model_doc/git
        title: GIT
-      - local: model_doc/got_ocr2
-        title: GOT-OCR2
      - local: model_doc/grounding-dino
        title: Grounding DINO
      - local: model_doc/groupvit
@ -955,8 +898,6 @@
        title: Pix2Struct
      - local: model_doc/pixtral
        title: Pixtral
-      - local: model_doc/qwen2_5_vl
-        title: Qwen2.5-VL
      - local: model_doc/qwen2_audio
        title: Qwen2Audio
      - local: model_doc/qwen2_vl
--- a/docs/source/en/add_new_pipeline.md
+++ b/docs/source/en/add_new_pipeline.md
@ -184,7 +184,7 @@ class PairClassificationPipeline(Pipeline):
 ```

 The implementation is framework agnostic, and will work for PyTorch and TensorFlow models. If we have saved this in
-a file named `pair_classification.py`, we can then import it and register it like this.
+a file named `pair_classification.py`, we can then import it and register it like this:

 ```py
 from pair_classification import PairClassificationPipeline
@ -199,22 +199,6 @@ PIPELINE_REGISTRY.register_pipeline(
 )
 ```

-The [register_pipeline](https://github.com/huggingface/transformers/blob/9feae5fb0164e89d4998e5776897c16f7330d3df/src/transformers/pipelines/base.py#L1387) function registers the pipeline details (task type, pipeline class, supported backends) to a models `config.json` file.
-
-```json
-  "custom_pipelines": {
-    "pair-classification": {
-      "impl": "pair_classification.PairClassificationPipeline",
-      "pt": [
-        "AutoModelForSequenceClassification"
-      ],
-      "tf": [
-        "TFAutoModelForSequenceClassification"
-      ],
-    }
-  },
-```
-
 Once this is done, we can use it with a pretrained model. For instance `sgugger/finetuned-bert-mrpc` has been
 fine-tuned on the MRPC dataset, which classifies pairs of sentences as paraphrases or not.

--- a/docs/source/en/agents_advanced.md
+++ b/docs/source/en/agents_advanced.md
@ -162,7 +162,7 @@ agent.run(
 improved_prompt could be "A bright blue space suit wearing rabbit, on the surface of the moon, under a bright orange sunset, with the Earth visible in the background"

 Now that I have improved the prompt, I can use the image generator tool to generate an image based on this prompt.
-=== Agent is executing the code below:
+>>> Agent is executing the code below:
 image = image_generator(prompt="A bright blue space suit wearing rabbit, on the surface of the moon, under a bright orange sunset, with the Earth visible in the background")
 final_answer(image)
 ```
--- a/docs/source/en/benchmarks.md
+++ b/docs/source/en/benchmarks.md
@ -0,0 +1,387 @@
+<!--Copyright 2020 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+# Benchmarks
+
+<Tip warning={true}>
+
+Hugging Face's Benchmarking tools are deprecated and it is advised to use external Benchmarking libraries to measure the speed 
+and memory complexity of Transformer models.
+
+</Tip>
+
+[[open-in-colab]]
+
+Let's take a look at how 🤗 Transformers models can be benchmarked, best practices, and already available benchmarks.
+
+A notebook explaining in more detail how to benchmark 🤗 Transformers models can be found [here](https://github.com/huggingface/notebooks/tree/main/examples/benchmark.ipynb).
+
+## How to benchmark 🤗 Transformers models
+
+The classes [`PyTorchBenchmark`] and [`TensorFlowBenchmark`] allow to flexibly benchmark 🤗 Transformers models. The benchmark classes allow us to measure the _peak memory usage_ and _required time_ for both _inference_ and _training_.
+
+<Tip>
+
+Here, _inference_ is defined by a single forward pass, and _training_ is defined by a single forward pass and
+backward pass.
+
+</Tip>
+
+The benchmark classes [`PyTorchBenchmark`] and [`TensorFlowBenchmark`] expect an object of type [`PyTorchBenchmarkArguments`] and
+[`TensorFlowBenchmarkArguments`], respectively, for instantiation. [`PyTorchBenchmarkArguments`] and [`TensorFlowBenchmarkArguments`] are data classes and contain all relevant configurations for their corresponding benchmark class. In the following example, it is shown how a BERT model of type _bert-base-cased_ can be benchmarked.
+
+<frameworkcontent>
+<pt>
+```py
+>>> from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments
+
+>>> args = PyTorchBenchmarkArguments(models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512])
+>>> benchmark = PyTorchBenchmark(args)
+```
+</pt>
+<tf>
+```py
+>>> from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments
+
+>>> args = TensorFlowBenchmarkArguments(
+...     models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+... )
+>>> benchmark = TensorFlowBenchmark(args)
+```
+</tf>
+</frameworkcontent>
+
+Here, three arguments are given to the benchmark argument data classes, namely `models`, `batch_sizes`, and
+`sequence_lengths`. The argument `models` is required and expects a `list` of model identifiers from the
+[model hub](https://huggingface.co/models) The `list` arguments `batch_sizes` and `sequence_lengths` define
+the size of the `input_ids` on which the model is benchmarked. There are many more parameters that can be configured
+via the benchmark argument data classes. For more detail on these one can either directly consult the files
+`src/transformers/benchmark/benchmark_args_utils.py`, `src/transformers/benchmark/benchmark_args.py` (for PyTorch)
+and `src/transformers/benchmark/benchmark_args_tf.py` (for Tensorflow). Alternatively, running the following shell
+commands from root will print out a descriptive list of all configurable parameters for PyTorch and Tensorflow
+respectively.
+
+<frameworkcontent>
+<pt>
+```bash
+python examples/pytorch/benchmarking/run_benchmark.py --help
+```
+
+An instantiated benchmark object can then simply be run by calling `benchmark.run()`.
+
+```py
+>>> results = benchmark.run()
+>>> print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
+--------------------------------------------------------------------------------
+Model Name             Batch Size     Seq Length     Time in s                  
+--------------------------------------------------------------------------------
+google-bert/bert-base-uncased          8               8             0.006     
+google-bert/bert-base-uncased          8               32            0.006     
+google-bert/bert-base-uncased          8              128            0.018     
+google-bert/bert-base-uncased          8              512            0.088     
+--------------------------------------------------------------------------------
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+--------------------------------------------------------------------------------
+Model Name             Batch Size     Seq Length    Memory in MB 
+--------------------------------------------------------------------------------
+google-bert/bert-base-uncased          8               8             1227
+google-bert/bert-base-uncased          8               32            1281
+google-bert/bert-base-uncased          8              128            1307
+google-bert/bert-base-uncased          8              512            1539
+--------------------------------------------------------------------------------
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+- transformers_version: 2.11.0
+- framework: PyTorch
+- use_torchscript: False
+- framework_version: 1.4.0
+- python_version: 3.6.10
+- system: Linux
+- cpu: x86_64
+- architecture: 64bit
+- date: 2020-06-29
+- time: 08:58:43.371351
+- fp16: False
+- use_multiprocessing: True
+- only_pretrain_model: False
+- cpu_ram_mb: 32088
+- use_gpu: True
+- num_gpus: 1
+- gpu: TITAN RTX
+- gpu_ram_mb: 24217
+- gpu_power_watts: 280.0
+- gpu_performance_state: 2
+- use_tpu: False
+```
+</pt>
+<tf>
+```bash
+python examples/tensorflow/benchmarking/run_benchmark_tf.py --help
+```
+
+An instantiated benchmark object can then simply be run by calling `benchmark.run()`.
+
+```py
+>>> results = benchmark.run()
+>>> print(results)
+>>> results = benchmark.run()
+>>> print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
+--------------------------------------------------------------------------------
+Model Name             Batch Size     Seq Length     Time in s                  
+--------------------------------------------------------------------------------
+google-bert/bert-base-uncased          8               8             0.005
+google-bert/bert-base-uncased          8               32            0.008
+google-bert/bert-base-uncased          8              128            0.022
+google-bert/bert-base-uncased          8              512            0.105
+--------------------------------------------------------------------------------
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+--------------------------------------------------------------------------------
+Model Name             Batch Size     Seq Length    Memory in MB 
+--------------------------------------------------------------------------------
+google-bert/bert-base-uncased          8               8             1330
+google-bert/bert-base-uncased          8               32            1330
+google-bert/bert-base-uncased          8              128            1330
+google-bert/bert-base-uncased          8              512            1770
+--------------------------------------------------------------------------------
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+- transformers_version: 2.11.0
+- framework: Tensorflow
+- use_xla: False
+- framework_version: 2.2.0
+- python_version: 3.6.10
+- system: Linux
+- cpu: x86_64
+- architecture: 64bit
+- date: 2020-06-29
+- time: 09:26:35.617317
+- fp16: False
+- use_multiprocessing: True
+- only_pretrain_model: False
+- cpu_ram_mb: 32088
+- use_gpu: True
+- num_gpus: 1
+- gpu: TITAN RTX
+- gpu_ram_mb: 24217
+- gpu_power_watts: 280.0
+- gpu_performance_state: 2
+- use_tpu: False
+```
+</tf>
+</frameworkcontent>
+
+By default, the _time_ and the _required memory_ for _inference_ are benchmarked. In the example output above the first
+two sections show the result corresponding to _inference time_ and _inference memory_. In addition, all relevant
+information about the computing environment, _e.g._ the GPU type, the system, the library versions, etc... are printed
+out in the third section under _ENVIRONMENT INFORMATION_. This information can optionally be saved in a _.csv_ file
+when adding the argument `save_to_csv=True` to [`PyTorchBenchmarkArguments`] and
+[`TensorFlowBenchmarkArguments`] respectively. In this case, every section is saved in a separate
+_.csv_ file. The path to each _.csv_ file can optionally be defined via the argument data classes.
+
+Instead of benchmarking pre-trained models via their model identifier, _e.g._ `google-bert/bert-base-uncased`, the user can
+alternatively benchmark an arbitrary configuration of any available model class. In this case, a `list` of
+configurations must be inserted with the benchmark args as follows.
+
+<frameworkcontent>
+<pt>
+```py
+>>> from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments, BertConfig
+
+>>> args = PyTorchBenchmarkArguments(
+...     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+... )
+>>> config_base = BertConfig()
+>>> config_384_hid = BertConfig(hidden_size=384)
+>>> config_6_lay = BertConfig(num_hidden_layers=6)
+
+>>> benchmark = PyTorchBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+>>> benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
+--------------------------------------------------------------------------------
+Model Name             Batch Size     Seq Length       Time in s                  
+--------------------------------------------------------------------------------
+bert-base                  8              128            0.006
+bert-base                  8              512            0.006
+bert-base                  8              128            0.018     
+bert-base                  8              512            0.088     
+bert-384-hid              8               8             0.006     
+bert-384-hid              8               32            0.006     
+bert-384-hid              8              128            0.011     
+bert-384-hid              8              512            0.054     
+bert-6-lay                 8               8             0.003     
+bert-6-lay                 8               32            0.004     
+bert-6-lay                 8              128            0.009     
+bert-6-lay                 8              512            0.044
+--------------------------------------------------------------------------------
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+--------------------------------------------------------------------------------
+Model Name             Batch Size     Seq Length      Memory in MB 
+--------------------------------------------------------------------------------
+bert-base                  8               8             1277
+bert-base                  8               32            1281
+bert-base                  8              128            1307     
+bert-base                  8              512            1539     
+bert-384-hid              8               8             1005     
+bert-384-hid              8               32            1027     
+bert-384-hid              8              128            1035     
+bert-384-hid              8              512            1255     
+bert-6-lay                 8               8             1097     
+bert-6-lay                 8               32            1101     
+bert-6-lay                 8              128            1127     
+bert-6-lay                 8              512            1359
+--------------------------------------------------------------------------------
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+- transformers_version: 2.11.0
+- framework: PyTorch
+- use_torchscript: False
+- framework_version: 1.4.0
+- python_version: 3.6.10
+- system: Linux
+- cpu: x86_64
+- architecture: 64bit
+- date: 2020-06-29
+- time: 09:35:25.143267
+- fp16: False
+- use_multiprocessing: True
+- only_pretrain_model: False
+- cpu_ram_mb: 32088
+- use_gpu: True
+- num_gpus: 1
+- gpu: TITAN RTX
+- gpu_ram_mb: 24217
+- gpu_power_watts: 280.0
+- gpu_performance_state: 2
+- use_tpu: False
+```
+</pt>
+<tf>
+```py
+>>> from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments, BertConfig
+
+>>> args = TensorFlowBenchmarkArguments(
+...     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+... )
+>>> config_base = BertConfig()
+>>> config_384_hid = BertConfig(hidden_size=384)
+>>> config_6_lay = BertConfig(num_hidden_layers=6)
+
+>>> benchmark = TensorFlowBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+>>> benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
+--------------------------------------------------------------------------------
+Model Name             Batch Size     Seq Length       Time in s                  
+--------------------------------------------------------------------------------
+bert-base                  8               8             0.005
+bert-base                  8               32            0.008
+bert-base                  8              128            0.022
+bert-base                  8              512            0.106
+bert-384-hid              8               8             0.005
+bert-384-hid              8               32            0.007
+bert-384-hid              8              128            0.018
+bert-384-hid              8              512            0.064
+bert-6-lay                 8               8             0.002
+bert-6-lay                 8               32            0.003
+bert-6-lay                 8              128            0.0011
+bert-6-lay                 8              512            0.074
+--------------------------------------------------------------------------------
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+--------------------------------------------------------------------------------
+Model Name             Batch Size     Seq Length      Memory in MB 
+--------------------------------------------------------------------------------
+bert-base                  8               8             1330
+bert-base                  8               32            1330
+bert-base                  8              128            1330
+bert-base                  8              512            1770
+bert-384-hid              8               8             1330
+bert-384-hid              8               32            1330
+bert-384-hid              8              128            1330
+bert-384-hid              8              512            1540
+bert-6-lay                 8               8             1330
+bert-6-lay                 8               32            1330
+bert-6-lay                 8              128            1330
+bert-6-lay                 8              512            1540
+--------------------------------------------------------------------------------
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+- transformers_version: 2.11.0
+- framework: Tensorflow
+- use_xla: False
+- framework_version: 2.2.0
+- python_version: 3.6.10
+- system: Linux
+- cpu: x86_64
+- architecture: 64bit
+- date: 2020-06-29
+- time: 09:38:15.487125
+- fp16: False
+- use_multiprocessing: True
+- only_pretrain_model: False
+- cpu_ram_mb: 32088
+- use_gpu: True
+- num_gpus: 1
+- gpu: TITAN RTX
+- gpu_ram_mb: 24217
+- gpu_power_watts: 280.0
+- gpu_performance_state: 2
+- use_tpu: False
+```
+</tf>
+</frameworkcontent>
+
+Again, _inference time_ and _required memory_ for _inference_ are measured, but this time for customized configurations
+of the `BertModel` class. This feature can especially be helpful when deciding for which configuration the model
+should be trained.
+
+
+## Benchmark best practices
+
+This section lists a couple of best practices one should be aware of when benchmarking a model.
+
+- Currently, only single device benchmarking is supported. When benchmarking on GPU, it is recommended that the user
+  specifies on which device the code should be run by setting the `CUDA_VISIBLE_DEVICES` environment variable in the
+  shell, _e.g._ `export CUDA_VISIBLE_DEVICES=0` before running the code.
+- The option `no_multi_processing` should only be set to `True` for testing and debugging. To ensure accurate
+  memory measurement it is recommended to run each memory benchmark in a separate process by making sure
+  `no_multi_processing` is set to `True`.
+- One should always state the environment information when sharing the results of a model benchmark. Results can vary
+  heavily between different GPU devices, library versions, etc., as a consequence, benchmark results on their own are not very
+  useful for the community.
+
+
+## Sharing your benchmark
+
+Previously all available core models (10 at the time) have been benchmarked for _inference time_, across many different
+settings: using PyTorch, with and without TorchScript, using TensorFlow, with and without XLA. All of those tests were
+done across CPUs (except for TensorFlow XLA) and GPUs.
+
+The approach is detailed in the [following blogpost](https://medium.com/huggingface/benchmarking-transformers-pytorch-and-tensorflow-e2917fb891c2) and the results are
+available [here](https://docs.google.com/spreadsheets/d/1sryqufw2D0XlUH4sq3e9Wnxu5EAQkaohzrJbd5HdQ_w/edit?usp=sharing).
+
+With the new _benchmark_ tools, it is easier than ever to share your benchmark results with the community
+
+- [PyTorch Benchmarking Results](https://github.com/huggingface/transformers/tree/main/examples/pytorch/benchmarking/README.md).
+- [TensorFlow Benchmarking Results](https://github.com/huggingface/transformers/tree/main/examples/tensorflow/benchmarking/README.md).
--- a/docs/source/en/chat_template_advanced.md
+++ b/docs/source/en/chat_template_advanced.md
@ -1,463 +0,0 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# Advanced Usage and Customizing Your Chat Templates
-
-In this page, we’ll explore more advanced techniques for working with chat templates in Transformers. Whether you’re looking to write your own templates, create custom components, or optimize your templates for efficiency, we’ll cover everything you need to take your templates to the next level. Let’s dive into the tools and strategies that will help you get the most out of your chat models.
-
-
-## How do chat templates work?
-
-The chat template for a model is stored on the `tokenizer.chat_template` attribute. Let's take a look at a `Zephyr` chat template, though note this
-one is a little simplified from the actual one!
-
-```
-{%- for message in messages %}
-    {{- '<|' + message['role'] + '|>\n' }}
-    {{- message['content'] + eos_token }}
-{%- endfor %}
-{%- if add_generation_prompt %}
-    {{- '<|assistant|>\n' }}
-{%- endif %}
-```
-
-If you've never seen one of these before, this is a [Jinja template](https://jinja.palletsprojects.com/en/3.1.x/templates/).
-Jinja is a templating language that allows you to write simple code that generates text. In many ways, the code and
-syntax resembles Python. In pure Python, this template would look something like this:
-
-```python
-for message in messages:
-    print(f'<|{message["role"]}|>')
-    print(message['content'] + eos_token)
-if add_generation_prompt:
-    print('<|assistant|>')
-```
-
-Effectively, the template does three things:
-1. For each message, print the role enclosed in `<|` and `|>`, like `<|user|>` or `<|assistant|>`.
-2. Next, print the content of the message, followed by the end-of-sequence token.
-3. Finally, if `add_generation_prompt` is set, print the assistant token, so that the model knows to start generating
-   an assistant response.
-
-This is a pretty simple template but Jinja gives you a lot of flexibility to do more complex things! Let's see a Jinja
-template that can format inputs similarly to the way LLaMA formats them (note that the real LLaMA template includes 
-handling for default system messages and slightly different system message handling in general - don't use this one 
-in your actual code!)
-
-```
-{%- for message in messages %}
-    {%- if message['role'] == 'user' %}
-        {{- bos_token + '[INST] ' + message['content'] + ' [/INST]' }}
-    {%- elif message['role'] == 'system' %}
-        {{- '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}
-    {%- elif message['role'] == 'assistant' %}
-        {{- ' '  + message['content'] + ' ' + eos_token }}
-    {%- endif %}
-{%- endfor %}
-```
-
-Hopefully if you stare at this for a little bit you can see what this template is doing - it adds specific tokens like
-`[INST]` and `[/INST]` based on the role of each message. User, assistant and system messages are clearly
-distinguishable to the model because of the tokens they're wrapped in.
-
-
-## How do I create a chat template?
-
-Simple, just write a jinja template and set `tokenizer.chat_template`. You may find it easier to start with an 
-existing template from another model and simply edit it for your needs! For example, we could take the LLaMA template
-above and add "[ASST]" and "[/ASST]" to assistant messages:
-
-```
-{%- for message in messages %}
-    {%- if message['role'] == 'user' %}
-        {{- bos_token + '[INST] ' + message['content'].strip() + ' [/INST]' }}
-    {%- elif message['role'] == 'system' %}
-        {{- '<<SYS>>\\n' + message['content'].strip() + '\\n<</SYS>>\\n\\n' }}
-    {%- elif message['role'] == 'assistant' %}
-        {{- '[ASST] '  + message['content'] + ' [/ASST]' + eos_token }}
-    {%- endif %}
-{%- endfor %}
-```
-
-Now, simply set the `tokenizer.chat_template` attribute. Next time you use [`~PreTrainedTokenizer.apply_chat_template`], it will
-use your new template! This attribute will be saved in the `tokenizer_config.json` file, so you can use
-[`~utils.PushToHubMixin.push_to_hub`] to upload your new template to the Hub and make sure everyone's using the right
-template for your model!
-
-```python
-template = tokenizer.chat_template
-template = template.replace("SYS", "SYSTEM")  # Change the system token
-tokenizer.chat_template = template  # Set the new template
-tokenizer.push_to_hub("model_name")  # Upload your new template to the Hub!
-```
-
-The method [`~PreTrainedTokenizer.apply_chat_template`] which uses your chat template is called by the [`TextGenerationPipeline`] class, so 
-once you set the correct chat template, your model will automatically become compatible with [`TextGenerationPipeline`].
-
-<Tip>
-If you're fine-tuning a model for chat, in addition to setting a chat template, you should probably add any new chat
-control tokens as special tokens in the tokenizer. Special tokens are never split, 
-ensuring that your control tokens are always handled as single tokens rather than being tokenized in pieces. You 
-should also set the tokenizer's `eos_token` attribute to the token that marks the end of assistant generations in your
-template. This will ensure that text generation tools can correctly figure out when to stop generating text.
-</Tip>
-
-
-## Why do some models have multiple templates?
-
-Some models use different templates for different use cases. For example, they might use one template for normal chat
-and another for tool-use, or retrieval-augmented generation. In these cases, `tokenizer.chat_template` is a dictionary.
-This can cause some confusion, and where possible, we recommend using a single template for all use-cases. You can use
-Jinja statements like `if tools is defined` and `{% macro %}` definitions to easily wrap multiple code paths in a
-single template.
-
-When a tokenizer has multiple templates, `tokenizer.chat_template` will be a `dict`, where each key is the name
-of a template. The `apply_chat_template` method has special handling for certain template names: Specifically, it will
-look for a template named `default` in most cases, and will raise an error if it can't find one. However, if a template
-named `tool_use` exists when the user has passed a `tools` argument, it will use that instead. To access templates
-with other names, pass the name of the template you want to the `chat_template` argument of
-`apply_chat_template()`.
-
-We find that this can be a bit confusing for users, though - so if you're writing a template yourself, we recommend
-trying to put it all in a single template where possible!
-
-
-## What template should I use?
-
-When setting the template for a model that's already been trained for chat, you should ensure that the template
-exactly matches the message formatting that the model saw during training, or else you will probably experience
-performance degradation. This is true even if you're training the model further - you will probably get the best 
-performance if you keep the chat tokens constant. This is very analogous to tokenization - you generally get the
-best performance for inference or fine-tuning when you precisely match the tokenization used during training.
-
-If you're training a model from scratch, or fine-tuning a base language model for chat, on the other hand,
-you have a lot of freedom to choose an appropriate template! LLMs are smart enough to learn to handle lots of different
-input formats. One popular choice is the `ChatML` format, and this is a good, flexible choice for many use-cases. 
-It looks like this:
-
-```
-{%- for message in messages %}
-    {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}
-{%- endfor %}
-```
-
-If you like this one, here it is in one-liner form, ready to copy into your code. The one-liner also includes
-handy support for [generation prompts](#what-are-generation-prompts), but note that it doesn't add BOS or EOS tokens!
-If your model expects those, they won't be added automatically by `apply_chat_template` - in other words, the
-text will be tokenized with `add_special_tokens=False`. This is to avoid potential conflicts between the template and
-the `add_special_tokens` logic. If your model expects special tokens, make sure to add them to the template!
-
-```python
-tokenizer.chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
-```
-
-This template wraps each message in `<|im_start|>` and `<|im_end|>` tokens, and simply writes the role as a string, which
-allows for flexibility in the roles you train with. The output looks like this:
-
-```text
-<|im_start|>system
-You are a helpful chatbot that will do its best not to say anything so stupid that people tweet about it.<|im_end|>
-<|im_start|>user
-How are you?<|im_end|>
-<|im_start|>assistant
-I'm doing great!<|im_end|>
-```
-
-The "user", "system" and "assistant" roles are the standard for chat, and we recommend using them when it makes sense,
-particularly if you want your model to operate well with [`TextGenerationPipeline`]. However, you are not limited
-to these roles - templating is extremely flexible, and any string can be a role.
-
-## I want to add some chat templates! How should I get started?
-
-If you have any chat models, you should set their `tokenizer.chat_template` attribute and test it using
-[`~PreTrainedTokenizer.apply_chat_template`], then push the updated tokenizer to the Hub. This applies even if you're
-not the model owner - if you're using a model with an empty chat template, or one that's still using the default class
-template, please open a [pull request](https://huggingface.co/docs/hub/repositories-pull-requests-discussions) to the model repository so that this attribute can be set properly!
-
-Once the attribute is set, that's it, you're done! `tokenizer.apply_chat_template` will now work correctly for that
-model, which means it is also automatically supported in places like `TextGenerationPipeline`!
-
-By ensuring that models have this attribute, we can make sure that the whole community gets to use the full power of
-open-source models. Formatting mismatches have been haunting the field and silently harming performance for too long - 
-it's time to put an end to them!
-
-
-<Tip>
-
-The easiest way to get started with writing Jinja templates is to take a look at some existing ones. You can use
-`print(tokenizer.chat_template)` for any chat model to see what template it's using. In general, models that support tool use have 
-much more complex templates than other models - so when you're just getting started, they're probably a bad example
-to learn from! You can also take a look at the 
-[Jinja documentation](https://jinja.palletsprojects.com/en/3.1.x/templates/#synopsis) for details
-of general Jinja formatting and syntax.
-
-</Tip>
-
-Jinja templates in `transformers` are identical to Jinja templates elsewhere. The main thing to know is that 
-the conversation history will be accessible inside your template as a variable called `messages`.  
-You will be able to access `messages` in your template just like you can in Python, which means you can loop over 
-it with `{% for message in messages %}` or access individual messages with `{{ messages[0] }}`, for example.
-
-You can also use the following tips to write clean, efficient Jinja templates:
-
-### Trimming whitespace
-
-By default, Jinja will print any whitespace that comes before or after a block. This can be a problem for chat
-templates, which generally want to be very precise with whitespace! To avoid this, we strongly recommend writing
-your templates like this:
-
-```
-{%- for message in messages %}
-    {{- message['role'] + message['content'] }}
-{%- endfor %}
-```
-
-rather than like this:
-
-```
-{% for message in messages %}
-    {{ message['role'] + message['content'] }}
-{% endfor %}
-```
-
-Adding `-` will strip any whitespace that comes before the block. The second example looks innocent, but the newline
-and indentation may end up being included in the output, which is probably not what you want!
-
-### Special variables
-
-Inside your template, you will have access several special variables. The most important of these is `messages`, 
-which contains the chat history as a list of message dicts. However, there are several others. Not every
-variable will be used in every template. The most common other variables are:
-
- `tools` contains a list of tools in JSON schema format. Will be `None` or undefined if no tools are passed.
- `documents` contains a list of documents in the format `{"title": "Title", "contents": "Contents"}`, used for retrieval-augmented generation. Will be `None` or undefined if no documents are passed.
- `add_generation_prompt` is a bool that is `True` if the user has requested a generation prompt, and `False` otherwise. If this is set, your template should add the header for an assistant message to the end of the conversation. If your model doesn't have a specific header for assistant messages, you can ignore this flag.
- **Special tokens** like `bos_token` and `eos_token`. These are extracted from `tokenizer.special_tokens_map`. The exact tokens available inside each template will differ depending on the parent tokenizer.
-
-<Tip>
-
-You can actually pass any `kwarg` to `apply_chat_template`, and it will be accessible inside the template as a variable. In general,
-we recommend trying to stick to the core variables above, as it will make your model harder to use if users have
-to write custom code to pass model-specific `kwargs`. However, we're aware that this field moves quickly, so if you
-have a new use-case that doesn't fit in the core API, feel free to use a new `kwarg` for it! If a new `kwarg`
-becomes common we may promote it into the core API and create a standard, documented format for it.
-
-</Tip>
-
-### Callable functions
-
-There is also a short list of callable functions available to you inside your templates. These are:
-
- `raise_exception(msg)`: Raises a `TemplateException`. This is useful for debugging, and for telling users when they're
-doing something that your template doesn't support.
- `strftime_now(format_str)`: Equivalent to `datetime.now().strftime(format_str)` in Python. This is used for getting
-the current date/time in a specific format, which is sometimes included in system messages.
-
-### Compatibility with non-Python Jinja
-
-There are multiple implementations of Jinja in various languages. They generally have the same syntax,
-but a key difference is that when you're writing a template in Python you can use Python methods, such as
-`.lower()` on strings or `.items()` on dicts. This will break if someone tries to use your template on a non-Python
-implementation of Jinja. Non-Python implementations are particularly common in deployment environments, where JS
-and Rust are very popular. 
-
-Don't panic, though! There are a few easy changes you can make to your templates to ensure they're compatible across
-all implementations of Jinja:
-
- Replace Python methods with Jinja filters. These usually have the same name, for example `string.lower()` becomes
-  `string|lower`, and `dict.items()` becomes `dict|items`. One notable change is that `string.strip()` becomes `string|trim`.
-  See the [list of built-in filters](https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters)
-  in the Jinja documentation for more.
- Replace `True`, `False` and `None`, which are Python-specific, with `true`, `false` and `none`.
- Directly rendering a dict or list may give different results in other implementations (for example, string entries
-  might change from single-quoted to double-quoted). Adding the `tojson` filter can help to ensure consistency here.
-
-### Writing generation prompts
-
-We mentioned above that `add_generation_prompt` is a special variable that will be accessible inside your template,
-and is controlled by the user setting the `add_generation_prompt` flag. If your model expects a header for
-assistant messages, then your template must support adding the header when `add_generation_prompt` is set.
-
-Here is an example of a template that formats messages ChatML-style, with generation prompt support:
-
-```text
-{{- bos_token }}
-{%- for message in messages %}
-    {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}
-{%- endfor %}
-{%- if add_generation_prompt %}
-    {{- '<|im_start|>assistant\n' }}
-{%- endif %}
-```
-
-The exact content of the assistant header will depend on your specific model, but it should always be **the string
-that represents the start of an assistant message**, so that if the user applies your template with 
-`add_generation_prompt=True` and then generates text, the model will write an assistant response. Also note that some
-models do not need a generation prompt, because assistant messages always begin immediately after user messages. 
-This is particularly common for LLaMA and Mistral models, where assistant messages begin immediately after the `[/INST]`
-token that ends user messages. In these cases, the template can ignore the `add_generation_prompt` flag.
-
-Generation prompts are important! If your model requires a generation prompt but it is not set in the template, then
-model generations will likely be severely degraded, or the model may display unusual behaviour like continuing 
-the final user message! 
-
-### Writing and debugging larger templates
-
-When this feature was introduced, most templates were quite small, the Jinja equivalent of a "one-liner" script. 
-However, with new models and features like tool-use and RAG, some templates can be 100 lines long or more. When
-writing templates like these, it's a good idea to write them in a separate file, using a text editor. You can easily 
-extract a chat template to a file:
-
-```python
-open("template.jinja", "w").write(tokenizer.chat_template)
-```
-
-Or load the edited template back into the tokenizer:
-
-```python
-tokenizer.chat_template = open("template.jinja").read()
-```
-
-As an added bonus, when you write a long, multi-line template in a separate file, line numbers in that file will
-exactly correspond to line numbers in template parsing or execution errors. This will make it much easier to
-identify the source of issues.
-
-
-
-## Writing templates for tools
-
-Although chat templates do not enforce a specific API for tools (or for anything, really), we recommend 
-template authors try to stick to a standard API where possible. The whole point of chat templates is to allow code
-to be transferable across models, so deviating from the standard tools API means users will have to write
-custom code to use tools with your model. Sometimes it's unavoidable, but often with clever templating you can
-make the standard API work!
-
-Below, we'll list the elements of the standard API, and give tips on writing templates that will work well with it.
-
-### Tool definitions
-
-Your template should expect that the variable `tools` will either be null (if no tools are passed), or is a list 
-of JSON schema dicts. Our chat template methods allow users to pass tools as either JSON schema or Python functions, but when
-functions are passed, we automatically generate JSON schema and pass that to your template. As a result, the 
-`tools` variable that your template receives will always be a list of JSON schema. Here is
-a sample tool JSON schema:
-
-```json
-{
-  "type": "function", 
-  "function": {
-    "name": "multiply", 
-    "description": "A function that multiplies two numbers", 
-    "parameters": {
-      "type": "object", 
-      "properties": {
-        "a": {
-          "type": "number", 
-          "description": "The first number to multiply"
-        }, 
-        "b": {
-          "type": "number",
-          "description": "The second number to multiply"
-        }
-      }, 
-      "required": ["a", "b"]
-    }
-  }
-}
-```
-
-And here is some example code for handling tools in your chat template. Remember, this is just an example for a
-specific format - your model will probably need different formatting!
-
-```text
-{%- if tools %}
-    {%- for tool in tools %}
-        {{- '<tool>' + tool['function']['name'] + '\n' }}
-        {%- for argument in tool['function']['parameters']['properties'] %}
-            {{- argument + ': ' + tool['function']['parameters']['properties'][argument]['description'] + '\n' }}
-        {%- endfor %}
-        {{- '\n</tool>' }}
-    {%- endif %}
-{%- endif %}
-```
-
-The specific tokens and tool descriptions your template renders should of course be chosen to match the ones your model
-was trained with. There is no requirement that your **model** understands JSON schema input, only that your template can translate
-JSON schema into your model's format. For example, [Command-R](https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024) 
-was trained with tools defined using Python function headers, but the Command-R tool template accepts JSON schema, 
-converts types internally and renders the input tools as Python headers. You can do a lot with templates!
-
-### Tool calls
-
-Tool calls, if present, will be a list attached to a message with the "assistant" role. Note that `tool_calls` is 
-always a list, even though most tool-calling models only support single tool calls at a time, which means
-the list will usually only have a single element. Here is a sample message dict containing a tool call:
-
-```json
-{
-  "role": "assistant",
-  "tool_calls": [
-    {
-      "type": "function",
-      "function": {
-        "name": "multiply",
-        "arguments": {
-          "a": 5,
-          "b": 6
-        }
-      }
-    }
-  ]
-}
-```
-
-And a common pattern for handling them would be something like this:
-
-```text
-{%- if message['role'] == 'assistant' and 'tool_calls' in message %}
-    {%- for tool_call in message['tool_calls'] %}
-            {{- '<tool_call>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments']|tojson + '\n</tool_call>' }}
-        {%- endif %}
-    {%- endfor %}
-{%- endif %}
-```
-
-Again, you should render the tool call with the formatting and special tokens that your model expects.
-
-### Tool responses
-
-Tool responses have a simple format: They are a message dict with the "tool" role, a "name" key giving the name
-of the called function, and a "content" key containing the result of the tool call. Here is a sample tool response:
-
-```json
-{
-  "role": "tool",
-  "name": "multiply",
-  "content": "30"
-}
-```
-
-You don't need to use all of the keys in the tool response. For example, if your model doesn't expect the function
-name to be included in the tool response, then rendering it can be as simple as:
-
-```text
-{%- if message['role'] == 'tool' %}
-    {{- "<tool_result>" + message['content'] + "</tool_result>" }}
-{%- endif %}
-```
-
-Again, remember that the actual formatting and special tokens are model-specific - you should take a lot of care
-to ensure that tokens, whitespace and everything else exactly match the format your model was trained with!
--- a/docs/source/en/chat_template_basics.md
+++ b/docs/source/en/chat_template_basics.md
@ -1,287 +0,0 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# Getting Started with Chat Templates for Text LLMs
-
-An increasingly common use case for LLMs is **chat**. In a chat context, rather than continuing a single string
-of text (as is the case with a standard language model), the model instead continues a conversation that consists
-of one or more **messages**, each of which includes a **role**, like "user" or "assistant", as well as message text.
-
-Much like tokenization, different models expect very different input formats for chat. This is the reason we added
-**chat templates** as a feature. Chat templates are part of the tokenizer for text-only LLMs or processor for multimodal LLMs. They specify how to convert conversations, represented as lists of messages, into a single tokenizable string in the format that the model expects. 
-
-We'll explore the basic usage of chat templates with text-only LLMs in this page. For detailed guidance on multimodal models, we have a dedicated [documentation oage for multimodal models](./chat_template_multimodal), which covers how to work with image, video and audio inputs in your templates.
-
-Let's make this concrete with a quick example using the `mistralai/Mistral-7B-Instruct-v0.1` model:
-
-```python
->>> from transformers import AutoTokenizer
->>> tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
-
->>> chat = [
-...   {"role": "user", "content": "Hello, how are you?"},
-...   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
-...   {"role": "user", "content": "I'd like to show off how chat templating works!"},
-... ]
-
->>> tokenizer.apply_chat_template(chat, tokenize=False)
-"<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]"
-```
-
-Notice how the tokenizer has added the control tokens [INST] and [/INST] to indicate the start and end of 
-user messages (but not assistant messages!), and the entire chat is condensed into a single string. 
-If we use `tokenize=True`, which is the default setting, that string will also be tokenized for us.
-
-Now, try the same code, but swap in the `HuggingFaceH4/zephyr-7b-beta` model instead, and you should get:
-
-```text
-<|user|>
-Hello, how are you?</s>
-<|assistant|>
-I'm doing great. How can I help you today?</s>
-<|user|>
-I'd like to show off how chat templating works!</s>
-```
-
-Both Zephyr and Mistral-Instruct were fine-tuned from the same base model, `Mistral-7B-v0.1`. However, they were trained
-with totally different chat formats. Without chat templates, you would have to write manual formatting code for each
-model, and it's very easy to make minor errors that hurt performance! Chat templates handle the details of formatting 
-for you, allowing you to write universal code that works for any model.
-
-
-## How do I use chat templates?
-
-As you can see in the example above, chat templates are easy to use. Simply build a list of messages, with `role`
-and `content` keys, and then pass it to the [`~PreTrainedTokenizer.apply_chat_template`] or [`~ProcessorMixin.apply_chat_template`] method
-depending on what type of model you are using. Once you do that,
-you'll get output that's ready to go! When using chat templates as input for model generation, it's also a good idea
-to use `add_generation_prompt=True` to add a [generation prompt](#what-are-generation-prompts). 
-
-Here's an example of preparing input for `model.generate()`, using `Zephyr` again:
-
-```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-checkpoint = "HuggingFaceH4/zephyr-7b-beta"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = AutoModelForCausalLM.from_pretrained(checkpoint)  # You may want to use bfloat16 and/or move to GPU here
-
-messages = [
-    {
-        "role": "system",
-        "content": "You are a friendly chatbot who always responds in the style of a pirate",
-    },
-    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
- ]
-tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
-print(tokenizer.decode(tokenized_chat[0]))
-```
-This will yield a string in the input format that Zephyr expects. 
-```text
-<|system|>
-You are a friendly chatbot who always responds in the style of a pirate</s> 
-<|user|>
-How many helicopters can a human eat in one sitting?</s> 
-<|assistant|>
-```
-
-Now that our input is formatted correctly for Zephyr, we can use the model to generate a response to the user's question:
-
-```python
-outputs = model.generate(tokenized_chat, max_new_tokens=128) 
-print(tokenizer.decode(outputs[0]))
-```
-
-This will yield:
-
-```text
-<|system|>
-You are a friendly chatbot who always responds in the style of a pirate</s> 
-<|user|>
-How many helicopters can a human eat in one sitting?</s> 
-<|assistant|>
-Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all.
-```
-
-Arr, 'twas easy after all!
-
-
-## Is there an automated pipeline for chat?
-
-Yes, there is! Our text generation pipelines support chat inputs, which makes it easy to use chat models. In the past,
-we used to use a dedicated "ConversationalPipeline" class, but this has now been deprecated and its functionality
-has been merged into the [`TextGenerationPipeline`]. Let's try the `Zephyr` example again, but this time using 
-a pipeline:
-
-```python
-from transformers import pipeline
-
-pipe = pipeline("text-generation", "HuggingFaceH4/zephyr-7b-beta")
-messages = [
-    {
-        "role": "system",
-        "content": "You are a friendly chatbot who always responds in the style of a pirate",
-    },
-    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
-]
-print(pipe(messages, max_new_tokens=128)[0]['generated_text'][-1])  # Print the assistant's response
-```
-
-```text
-{'role': 'assistant', 'content': "Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all."}
-```
-
-The pipeline will take care of all the details of tokenization and calling `apply_chat_template` for you -
-once the model has a chat template, all you need to do is initialize the pipeline and pass it the list of messages!
-
-
-## What are "generation prompts"?
-
-You may have noticed that the `apply_chat_template` method has an `add_generation_prompt` argument. This argument tells
-the template to add tokens that indicate the start of a bot response. For example, consider the following chat:
-
-```python
-messages = [
-    {"role": "user", "content": "Hi there!"},
-    {"role": "assistant", "content": "Nice to meet you!"},
-    {"role": "user", "content": "Can I ask a question?"}
-]
-```
-
-Here's what this will look like without a generation prompt, for a model that uses standard "ChatML" formatting:
-
-```python
-tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
-"""<|im_start|>user
-Hi there!<|im_end|>
-<|im_start|>assistant
-Nice to meet you!<|im_end|>
-<|im_start|>user
-Can I ask a question?<|im_end|>
-"""
-```
-
-And here's what it looks like **with** a generation prompt:
-
-```python
-tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-"""<|im_start|>user
-Hi there!<|im_end|>
-<|im_start|>assistant
-Nice to meet you!<|im_end|>
-<|im_start|>user
-Can I ask a question?<|im_end|>
-<|im_start|>assistant
-"""
-```
-
-Note that this time, we've added the tokens that indicate the start of a bot response. This ensures that when the model
-generates text it will write a bot response instead of doing something unexpected, like continuing the user's 
-message. Remember, chat models are still just language models - they're trained to continue text, and chat is just a 
-special kind of text to them! You need to guide them with appropriate control tokens, so they know what they're 
-supposed to be doing.
-
-Not all models require generation prompts. Some models, like LLaMA, don't have any
-special tokens before bot responses. In these cases, the `add_generation_prompt` argument will have no effect. The exact
-effect that `add_generation_prompt` has will depend on the template being used.
-
-
-## What does "continue_final_message" do?
-
-When passing a list of messages to `apply_chat_template` or `TextGenerationPipeline`, you can choose
-to format the chat so the model will continue the final message in the chat instead of starting a new one. This is done
-by removing any end-of-sequence tokens that indicate the end of the final message, so that the model will simply
-extend the final message when it begins to generate text. This is useful for "prefilling" the model's response. 
-
-Here's an example:
-
-```python
-chat = [
-    {"role": "user", "content": "Can you format the answer in JSON?"},
-    {"role": "assistant", "content": '{"name": "'},
-]
-
-formatted_chat = tokenizer.apply_chat_template(chat, tokenize=True, return_dict=True, continue_final_message=True)
-model.generate(**formatted_chat)
-```
-
-The model will generate text that continues the JSON string, rather than starting a new message. This approach
-can be very useful for improving the accuracy of the model's instruction-following when you know how you want
-it to start its replies.
-
-Because `add_generation_prompt` adds the tokens that start a new message, and `continue_final_message` removes any
-end-of-message tokens from the final message, it does not make sense to use them together. As a result, you'll
-get an error if you try!
-
-<Tip>
-
-The default behaviour of `TextGenerationPipeline` is to set `add_generation_prompt=True` so that it starts a new
-message. However, if the final message in the input chat has the "assistant" role, it will assume that this message is 
-a prefill and switch to `continue_final_message=True` instead, because most models do not support multiple 
-consecutive assistant messages. You can override this behaviour by explicitly passing the `continue_final_message` 
-argument when calling the pipeline.
-
-</Tip>
-
-
-## Can I use chat templates in training?
-
-Yes! This is a good way to ensure that the chat template matches the tokens the model sees during training.
-We recommend that you apply the chat template as a preprocessing step for your dataset. After this, you
-can simply continue like any other language model training task. When training, you should usually set 
-`add_generation_prompt=False`, because the added tokens to prompt an assistant response will not be helpful during 
-training. Let's see an example:
-
-```python
-from transformers import AutoTokenizer
-from datasets import Dataset
-
-tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
-
-chat1 = [
-    {"role": "user", "content": "Which is bigger, the moon or the sun?"},
-    {"role": "assistant", "content": "The sun."}
-]
-chat2 = [
-    {"role": "user", "content": "Which is bigger, a virus or a bacterium?"},
-    {"role": "assistant", "content": "A bacterium."}
-]
-
-dataset = Dataset.from_dict({"chat": [chat1, chat2]})
-dataset = dataset.map(lambda x: {"formatted_chat": tokenizer.apply_chat_template(x["chat"], tokenize=False, add_generation_prompt=False)})
-print(dataset['formatted_chat'][0])
-```
-And we get:
-```text
-<|user|>
-Which is bigger, the moon or the sun?</s>
-<|assistant|>
-The sun.</s>
-```
-
-From here, just continue training like you would with a standard language modelling task, using the `formatted_chat` column.
-
-<Tip>
-
-By default, some tokenizers add special tokens like `<bos>` and `<eos>` to text they tokenize. Chat templates should 
-already include all the special tokens they need, and so additional special tokens will often be incorrect or 
-duplicated, which will hurt model performance.
-
-Therefore, if you format text with `apply_chat_template(tokenize=False)`, you should set the argument
-`add_special_tokens=False` when you tokenize that text later. If you use `apply_chat_template(tokenize=True)`, you don't need to worry about this!
-
-</Tip>
-
--- a/docs/source/en/chat_template_multimodal.md
+++ b/docs/source/en/chat_template_multimodal.md
@ -1,289 +0,0 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# Multimodal Chat Templates for Vision and Audio LLMs
-
-In this section, we'll explore how to use chat templates with multimodal models, enabling your templates to handle a variety of inputs such as text, images, and audio. Multimodal models provide richer, more interactive experiences, and understanding how to effectively combine these inputs within your templates is key. We’ll walk through how to work with different modalities, configure your templates for optimal performance, and tackle common challenges along the way.
-
-Just like with text-only LLMs, multimodal models expect a chat with **messages**, each of which includes a **role** and **content**. However, for multimodal models, chat templates are a part of the [Processor](./main_cllasses/processors) class. Let's see how we can format our prompts when there are images or videos in the input along with text.
-
-
-## Image inputs
-
-For models such as [LLaVA](https://huggingface.co/llava-hf) the prompts can be formatted as below. Notice that the only difference from text-only models is that we need to also pass a placeholder for input images. To accommodate for extra modalities, each **content** is a list containing either a text or an image **type**.
-
-Let's make this concrete with a quick example using the `llava-hf/llava-onevision-qwen2-0.5b-ov-hf` model:
-
-```python
-from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration
-
-model_id = "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
-processor = AutoProcessor.from_pretrained(model_id)
-
-messages = [
-    {
-        "role": "system",
-        "content": [{"type": "text", "text": "You are a friendly chatbot who always responds in the style of a pirate"}],
-    },
-    {
-      "role": "user",
-      "content": [
-            {"type": "image"},
-            {"type": "text", "text": "What are these?"},
-        ],
-    },
-]
-
-formatted_prompt = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
-print(formatted_prompt)
-```
-
-This yields a string in LLaVA's expected input format with many `<image>` tokens prepended before the text.
-```text
-'<|im_start|>system 
-<|im_start|>system 
-You are a friendly chatbot who always responds in the style of a pirate<|im_end|><|im_start|>user <image>
-What are these?<|im_end|>
-```
-
-
-### Image paths or URLs
-
-To incorporate images into your chat templates, you can pass them as file paths or URLs. This method automatically loads the image, processes it, and prepares the necessary pixel values to create ready-to-use inputs for the model. This approach simplifies the integration of images, enabling seamless multimodal functionality.
-
-Let's see how it works with an example using the same model as above. This time we'll indicate an image URL with `"url"` key in the message's **content** and ask the chat template to `tokenize` and `return_dict`. Currently, "base64", "url", and "path" are supported image sources.
-
-```python
-from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration
-
-model_id = "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
-model = LlavaOnevisionForConditionalGeneration.from_pretrained(model_id)
-processor = AutoProcessor.from_pretrained(model_id)
-
-messages = [
-    {
-        "role": "system",
-        "content": [{"type": "text", "text": "You are a friendly chatbot who always responds in the style of a pirate"}],
-    },
-    {
-      "role": "user",
-      "content": [
-            {"type": "image", "url": "http://images.cocodataset.org/val2017/000000039769.jpg"},
-            {"type": "text", "text": "What are these?"},
-        ],
-    },
-]
-
-processed_chat = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt")
-print(processed_chat.keys())
-```
-
-This yields a dictionary with inputs processed and ready to be further passed into [`~GenerationMixin.generate`] to generate text.
-```text
-dict_keys(["input_ids", "attention_mask", "pixel_values", "image_sizes"])
-```
-
-
-## Video inputs
-
-Some vision models support videos as inputs as well as images. The message format is very similar to the image-only models with tiny differences to handle loading videos from a URL. We can continue using the same model as before since it supports videos.
-
-### Sampling with fixed number of frames
-
-Here's an example of how to set up a conversation with video inputs. Notice the extra `kwargs` passed to `processor.apply_chat_template()`. The key parameter here is `num_frames`, which controls how many frames to sample uniformly from the video. Each model checkpoint has a maximum frame count it was trained with, and exceeding this limit can significantly impact generation quality. So, it’s important to choose a frame count that fits both the model's capacity and your computational resources. If you don't specify `num_frames`, the entire video will be loaded without any frame sampling.
-
-You also have the option to choose a specific framework to load the video, depending on your preferences or needs. Currently, we support `decord`, `pyav` (the default), `opencv`, and `torchvision`. For this example, we’ll use `decord`, as it's a bit faster than `pyav`.
-
-
-<Tip>
-
-Note that if you are trying to load a video from URL, you can decode the video only with `pyav` or `decord` as backend.
-
-</Tip>
-
-
-```python
-from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration
-
-model_id = "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
-model = LlavaOnevisionForConditionalGeneration.from_pretrained(model_id)
-processor = AutoProcessor.from_pretrained(model_id)
-
-messages = [
-    {
-        "role": "system",
-        "content": [{"type": "text", "text": "You are a friendly chatbot who always responds in the style of a pirate"}],
-    },
-    {
-      "role": "user",
-      "content": [
-            {"type": "video", "url": "https://test-videos.co.uk/vids/bigbuckbunny/mp4/h264/720/Big_Buck_Bunny_720_10s_10MB.mp4"},
-            {"type": "text", "text": "What do you see in this video?"},
-        ],
-    },
-]
-
-processed_chat = processor.apply_chat_template(
-    messages,
-    add_generation_prompt=True,
-    tokenize=True,
-    return_dict=True,
-    return_tensors="pt",
-    num_frames=32,
-    video_load_backend="decord",
-)
-print(processed_chat.keys())
-```
-
-### Sampling with FPS
-
-When working with long videos, you might want to sample more frames for better representation. Instead of a fixed number of frames, you can specify `video_fps`, which determines how many frames per second to extract. For example, if a video is **10 seconds long** and you set `video_fps=2`, the model will sample **20 frames** (2 per second, uniformly spaced). 
-
-Using the above model, we need to apply chat template as follows to sample 2 frames per second.
-
-```python
-processed_chat = processor.apply_chat_template(
-    messages,
-    add_generation_prompt=True,
-    tokenize=True,
-    return_dict=True,
-    video_fps=32,
-    video_load_backend="decord",
-)
-print(processed_chat.keys())
-```
-
-
-### Custom Frame Sampling with a Function  
-
-Not all models sample frames **uniformly** — some require more complex logic to determine which frames to use. If your model follows a different sampling strategy, you can **customize** frame selection by providing a function:  
-
-🔹 Use the `sample_indices_fn` argument to pass a **callable function** for sampling.  
-🔹 If provided, this function **overrides** standard `num_frames` and `fps` methods.  
-🔹 It receives all the arguments passed to `load_video` and must return **valid frame indices** to sample.  
-
-You should use `sample_indices_fn` when:
-
- If you need a custom sampling strategy (e.g., **adaptive frame selection** instead of uniform sampling).  
- If your model prioritizes **key moments** in a video rather than evenly spaced frames.  
-
-Here’s an example of how to implement it:  
-
-
-```python
-
-def sample_indices_fn(metadata, **kwargs):
-    # samples only the first and the second frame
-    return [0, 1]
-
-processed_chat = processor.apply_chat_template(
-    messages,
-    add_generation_prompt=True,
-    tokenize=True,
-    return_dict=True,
-    sample_indices_fn=sample_indices_fn,
-    video_load_backend="decord",
-)
-print(processed_chat.keys())
-```
-
-By using `sample_indices_fn`, you gain **full control** over frame selection, making your model **more adaptable** to different video scenarios. 🚀  
-
-
-### List of image frames as video
-
-Sometimes, instead of having a full video file, you might only have a set of sampled frames stored as images.
-
-You can pass a list of image file paths, and the processor will automatically concatenate them into a video. Just make sure that all images have the same size, as they are assumed to be from the same video.
-
-
-```python
-frames_paths = ["/path/to/frame0.png", "/path/to/frame5.png", "/path/to/frame10.png"]
-messages = [
-    {
-        "role": "system",
-        "content": [{"type": "text", "text": "You are a friendly chatbot who always responds in the style of a pirate"}],
-    },
-    {
-      "role": "user",
-      "content": [
-            {"type": "video", "path": frames_paths},
-            {"type": "text", "text": "What do you see in this video?"},
-        ],
-    },
-]
-
-processed_chat = processor.apply_chat_template(
-    messages,
-    add_generation_prompt=True,
-    tokenize=True,
-    return_dict=True,
-)
-print(processed_chat.keys())
-```
-
-
-## Multimodal conversational pipeline
-
-[`ImageTextToTextPipeline`] currently accepts images as inputs but we are planning to add support for video inputs in the future. The pipeline supports chat inputs in the same format as we have seen above. Apart from that, the pipeline will accept chats in OpenAI format. This format is supported exclusively within the pipeline to make inference easier and more accessible. 
-
-Here is how the OpenAI conversation format looks:
-
-```python
-messages = [
-    {
-        "role": "user",
-        "content": [
-            {
-                "type": "text",
-                "text": "What is in this image?",
-            },
-            {
-                "type": "image_url",
-                "image_url": {"url": f"http://images.cocodataset.org/val2017/000000039769.jpg"},
-            },
-        ],
-    }
-]
-```
-
-## Best Practices for Multimodal Template Configuration
-
-
-To add a custom chat template for your multimodal LLM, simply create your template using [Jinja](https://jinja.palletsprojects.com/en/3.1.x/templates/) and set it with `processor.chat_template`. If you're new to writing chat templates or need some tips, check out our [tutorial here](./chat_template_advanced) for helpful guidance.
-
-In some cases, you may want your template to handle a **list of content** from multiple modalities, while still supporting a plain string for text-only inference. Here's an example of how you can achieve that, using the [Llama-Vision](https://huggingface.co/collections/meta-llama/metas-llama-32-multimodal-models-675bfd70e574a62dd0e4059b) chat template.
-
-
-```
-{% for message in messages %}
-{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}
-{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}
-{% if message['content'] is string %}
-{{ message['content'] }}
-{% else %}
-{% for content in message['content'] %}
-{% if content['type'] == 'image' %}
-{{ '<|image|>' }}
-{% elif content['type'] == 'text' %}
-{{ content['text'] }}
-{% endif %}
-{% endfor %}
-{% endif %}
-{{ '<|eot_id|>' }}
-{% endfor %}
-{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}
-```
--- a/docs/source/en/chat_template_tools_and_documents.md
+++ b/docs/source/en/chat_template_tools_and_documents.md
@ -1,410 +0,0 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-
-# Expanding Chat Templates with Tools and Documents
-
-The only argument that `apply_chat_template` requires is `messages`. However, you can pass any keyword
-argument to `apply_chat_template` and it will be accessible inside the template. This gives you a lot of freedom to use
-chat templates for many things. There are no restrictions on the names or the format of these arguments - you can pass
-strings, lists, dicts or whatever else you want. 
-
-That said, there are some common use-cases for these extra arguments,
-such as passing tools for function calling, or documents for retrieval-augmented generation. In these common cases,
-we have some opinionated recommendations about what the names and formats of these arguments should be, which are
-described in the sections below. We encourage model authors to make their chat templates compatible with this format,
-to make it easy to transfer tool-calling code between models.
-
-## Tool use / function calling
-
-"Tool use" LLMs can choose to call functions as external tools before generating an answer. When passing tools
-to a tool-use model, you can simply pass a list of functions to the `tools` argument:
-
-```python
-import datetime
-
-def current_time():
-    """Get the current local time as a string."""
-    return str(datetime.now())
-
-def multiply(a: float, b: float):
-    """
-    A function that multiplies two numbers
-    
-    Args:
-        a: The first number to multiply
-        b: The second number to multiply
-    """
-    return a * b
-
-tools = [current_time, multiply]
-
-model_input = tokenizer.apply_chat_template(
-    messages,
-    tools=tools
-)
-```
-
-In order for this to work correctly, you should write your functions in the format above, so that they can be parsed
-correctly as tools. Specifically, you should follow these rules:
-
- The function should have a descriptive name
- Every argument must have a type hint
- The function must have a docstring in the standard Google style (in other words, an initial function description  
-  followed by an `Args:` block that describes the arguments, unless the function does not have any arguments.) 
- Do not include types in the `Args:` block. In other words, write `a: The first number to multiply`, not
-  `a (int): The first number to multiply`. Type hints should go in the function header instead.
- The function can have a return type and a `Returns:` block in the docstring. However, these are optional
-  because most tool-use models ignore them.
-
-### Passing tool results to the model
-
-The sample code above is enough to list the available tools for your model, but what happens if it wants to actually use
-one? If that happens, you should:
-
-1. Parse the model's output to get the tool name(s) and arguments.
-2. Add the model's tool call(s) to the conversation.
-3. Call the corresponding function(s) with those arguments.
-4. Add the result(s) to the conversation
-
-### A complete tool use example
-
-Let's walk through a tool use example, step by step. For this example, we will use an 8B `Hermes-2-Pro` model,
-as it is one of the highest-performing tool-use models in its size category at the time of writing. If you have the
-memory, you can consider using a larger model instead like [Command-R](https://huggingface.co/CohereForAI/c4ai-command-r-v01)
-or [Mixtral-8x22B](https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1), both of which also support tool use
-and offer even stronger performance.
-
-First, let's load our model and tokenizer:
-
-```python
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-checkpoint = "NousResearch/Hermes-2-Pro-Llama-3-8B"
-
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
-```
-
-Next, let's define a list of tools:
-
-```python
-def get_current_temperature(location: str, unit: str) -> float:
-    """
-    Get the current temperature at a location.
-    
-    Args:
-        location: The location to get the temperature for, in the format "City, Country"
-        unit: The unit to return the temperature in. (choices: ["celsius", "fahrenheit"])
-    Returns:
-        The current temperature at the specified location in the specified units, as a float.
-    """
-    return 22.  # A real function should probably actually get the temperature!
-
-def get_current_wind_speed(location: str) -> float:
-    """
-    Get the current wind speed in km/h at a given location.
-    
-    Args:
-        location: The location to get the temperature for, in the format "City, Country"
-    Returns:
-        The current wind speed at the given location in km/h, as a float.
-    """
-    return 6.  # A real function should probably actually get the wind speed!
-
-tools = [get_current_temperature, get_current_wind_speed]
-```
-
-Now, let's set up a conversation for our bot:
-
-```python
-messages = [
-  {"role": "system", "content": "You are a bot that responds to weather queries. You should reply with the unit used in the queried location."},
-  {"role": "user", "content": "Hey, what's the temperature in Paris right now?"}
-]
-```
-
-Now, let's apply the chat template and generate a response:
-
-```python
-inputs = tokenizer.apply_chat_template(messages, tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
-inputs = {k: v.to(model.device) for k, v in inputs.items()}
-out = model.generate(**inputs, max_new_tokens=128)
-print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
-```
-
-And we get:
-
-```text
-<tool_call>
-{"arguments": {"location": "Paris, France", "unit": "celsius"}, "name": "get_current_temperature"}
-</tool_call><|im_end|>
-```
-
-The model has called the function with valid arguments, in the format requested by the function docstring. It has
-inferred that we're most likely referring to the Paris in France, and it remembered that, as the home of SI units,
-the temperature in France should certainly be displayed in Celsius.
-
-<Tip>
-
-The output format above is specific to the `Hermes-2-Pro` model we're using in this example. Other models may emit different
-tool call formats, and you may need to do some manual parsing at this step. For example, `Llama-3.1` models will emit
-slightly different JSON, with `parameters` instead of `arguments`. Regardless of the format the model outputs, you 
-should add the tool call to the conversation in the format below, with `tool_calls`, `function` and `arguments` keys. 
-
-</Tip>
-
-Next, let's append the model's tool call to the conversation.
-
-```python
-tool_call = {"name": "get_current_temperature", "arguments": {"location": "Paris, France", "unit": "celsius"}}
-messages.append({"role": "assistant", "tool_calls": [{"type": "function", "function": tool_call}]})
-```
-
-<Tip warning={true}>
-
-If you're familiar with the OpenAI API, you should pay attention to an important difference here - the `tool_call` is
-a dict, but in the OpenAI API it's a JSON string. Passing a string may cause errors or strange model behaviour!
-
-</Tip>
-
-Now that we've added the tool call to the conversation, we can call the function and append the result to the
-conversation. Since we're just using a dummy function for this example that always returns 22.0, we can just append 
-that result directly.
-
-```python
-messages.append({"role": "tool", "name": "get_current_temperature", "content": "22.0"})
-```
-
-<Tip>
-
-Some model architectures, notably Mistral/Mixtral, also require a `tool_call_id` here, which should be
-9 randomly-generated alphanumeric characters, and assigned to the `id` key of the tool call
-dictionary. The same key should also be assigned to the `tool_call_id` key of the tool response dictionary below, so 
-that tool calls can be matched to tool responses. So, for Mistral/Mixtral models, the code above would be:
-
-```python
-tool_call_id = "9Ae3bDc2F"  # Random ID, 9 alphanumeric characters
-tool_call = {"name": "get_current_temperature", "arguments": {"location": "Paris, France", "unit": "celsius"}}
-messages.append({"role": "assistant", "tool_calls": [{"type": "function", "id": tool_call_id, "function": tool_call}]})
-```
-
-and
-
-```python
-messages.append({"role": "tool", "tool_call_id": tool_call_id, "name": "get_current_temperature", "content": "22.0"})
-```
-
-</Tip>
-
-Finally, let's let the assistant read the function outputs and continue chatting with the user:
-
-```python
-inputs = tokenizer.apply_chat_template(messages, tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
-inputs = {k: v.to(model.device) for k, v in inputs.items()}
-out = model.generate(**inputs, max_new_tokens=128)
-print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
-```
-
-And we get:
-
-```text
-The current temperature in Paris, France is 22.0 ° Celsius.<|im_end|>
-```
-
-Although this was a simple demo with dummy tools and a single call, the same technique works with 
-multiple real tools and longer conversations. This can be a powerful way to extend the capabilities of conversational
-agents with real-time information, computational tools like calculators, or access to large databases.
-
-### Understanding tool schemas
-
-Each function you pass to the `tools` argument of `apply_chat_template` is converted into a 
-[JSON schema](https://json-schema.org/learn/getting-started-step-by-step). These schemas
-are then passed to the model chat template. In other words, tool-use models do not see your functions directly, and they
-never see the actual code inside them. What they care about is the function **definitions** and the **arguments** they
-need to pass to them - they care about what the tools do and how to use them, not how they work! It is up to you
-to read their outputs, detect if they have requested to use a tool, pass their arguments to the tool function, and
-return the response in the chat.
-
-Generating JSON schemas to pass to the template should be automatic and invisible as long as your functions
-follow the specification above, but if you encounter problems, or you simply want more control over the conversion, 
-you can handle the conversion manually. Here is an example of a manual schema conversion.
-
-```python
-from transformers.utils import get_json_schema
-
-def multiply(a: float, b: float):
-    """
-    A function that multiplies two numbers
-    
-    Args:
-        a: The first number to multiply
-        b: The second number to multiply
-    """
-    return a * b
-
-schema = get_json_schema(multiply)
-print(schema)
-```
-
-This will yield:
-
-```json
-{
-  "type": "function", 
-  "function": {
-    "name": "multiply", 
-    "description": "A function that multiplies two numbers", 
-    "parameters": {
-      "type": "object", 
-      "properties": {
-        "a": {
-          "type": "number", 
-          "description": "The first number to multiply"
-        }, 
-        "b": {
-          "type": "number",
-          "description": "The second number to multiply"
-        }
-      }, 
-      "required": ["a", "b"]
-    }
-  }
-}
-```
-
-If you wish, you can edit these schemas, or even write them from scratch yourself without using `get_json_schema` at 
-all. JSON schemas can be passed directly to the `tools` argument of 
-`apply_chat_template` - this gives you a lot of power to define precise schemas for more complex functions. Be careful,
-though - the more complex your schemas, the more likely the model is to get confused when dealing with them! We 
-recommend simple function signatures where possible, keeping arguments (and especially complex, nested arguments) 
-to a minimum.
-
-Here is an example of defining schemas by hand, and passing them directly to `apply_chat_template`:
-
-```python
-# A simple function that takes no arguments
-current_time = {
-  "type": "function", 
-  "function": {
-    "name": "current_time",
-    "description": "Get the current local time as a string.",
-    "parameters": {
-      'type': 'object',
-      'properties': {}
-    }
-  }
-}
-
-# A more complete function that takes two numerical arguments
-multiply = {
-  'type': 'function',
-  'function': {
-    'name': 'multiply',
-    'description': 'A function that multiplies two numbers', 
-    'parameters': {
-      'type': 'object', 
-      'properties': {
-        'a': {
-          'type': 'number',
-          'description': 'The first number to multiply'
-        }, 
-        'b': {
-          'type': 'number', 'description': 'The second number to multiply'
-        }
-      }, 
-      'required': ['a', 'b']
-    }
-  }
-}
-
-model_input = tokenizer.apply_chat_template(
-    messages,
-    tools = [current_time, multiply]
-)
-```
-
-## Retrieval-augmented generation
-
-"Retrieval-augmented generation" or "RAG" LLMs can search a corpus of documents for information before responding
-to a query. This allows models to vastly expand their knowledge base beyond their limited context size. Our 
-recommendation for RAG models is that their template
-should accept a `documents` argument. This should be a list of documents, where each "document"
-is a single dict with `title` and `contents` keys, both of which are strings. Because this format is much simpler
-than the JSON schemas used for tools, no helper functions are necessary.
-
-Here's an example of a RAG template in action:
-
-```python
-from transformers import AutoTokenizer, AutoModelForCausalLM
-
-# Load the model and tokenizer
-model_id = "CohereForAI/c4ai-command-r-v01-4bit"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
-device = model.device # Get the device the model is loaded on
-
-# Define conversation input
-conversation = [
-    {"role": "user", "content": "What has Man always dreamed of?"}
-]
-
-# Define documents for retrieval-based generation
-documents = [
-    {
-        "title": "The Moon: Our Age-Old Foe", 
-        "text": "Man has always dreamed of destroying the moon. In this essay, I shall..."
-    },
-    {
-        "title": "The Sun: Our Age-Old Friend",
-        "text": "Although often underappreciated, the sun provides several notable benefits..."
-    }
-]
-
-# Tokenize conversation and documents using a RAG template, returning PyTorch tensors.
-input_ids = tokenizer.apply_chat_template(
-    conversation=conversation,
-    documents=documents,
-    chat_template="rag",
-    tokenize=True,
-    add_generation_prompt=True,
-    return_tensors="pt").to(device)
-
-# Generate a response 
-gen_tokens = model.generate(
-    input_ids,
-    max_new_tokens=100,
-    do_sample=True,
-    temperature=0.3,
-    )
-
-# Decode and print the generated text along with generation prompt
-gen_text = tokenizer.decode(gen_tokens[0])
-print(gen_text)
-```
-
-<Tip>
-
-The `documents` input for retrieval-augmented generation is not widely supported, and many models have chat templates which simply ignore this input.
-
-To verify if a model supports the `documents` input, you can read its model card, or `print(tokenizer.chat_template)` to see if the `documents` key is used anywhere.
-
-One model class that does support it, though, is Cohere's [Command-R](https://huggingface.co/CohereForAI/c4ai-command-r-08-2024) and [Command-R+](https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024), through their `rag` chat template. You can see additional examples of grounded generation using this feature in their model cards.
-
-</Tip>
-
-
--- a/docs/source/en/chat_templating.md
+++ b/docs/source/en/chat_templating.md
--- a/docs/source/en/debugging.md
+++ b/docs/source/en/debugging.md
@ -30,7 +30,7 @@ DeepSpeed compiles CUDA C++ code and it can be a potential source of errors when

 <Tip>

-For any other installation issues, please [open an issue](https://github.com/deepspeedai/DeepSpeed/issues) with the DeepSpeed team.
+For any other installation issues, please [open an issue](https://github.com/microsoft/DeepSpeed/issues) with the DeepSpeed team.

 </Tip>

@ -89,7 +89,7 @@ sudo ln -s /usr/bin/g++-7  /usr/local/cuda-10.2/bin/g++
 If you're still having issues with installing DeepSpeed or if you're building DeepSpeed at run time, you can try to prebuild the DeepSpeed modules before installing them. To make a local build for DeepSpeed:

 ```bash
-git clone https://github.com/deepspeedai/DeepSpeed/
+git clone https://github.com/microsoft/DeepSpeed/
 cd DeepSpeed
 rm -rf build
 TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 pip install . \
@ -141,7 +141,7 @@ It is also possible to not specify `TORCH_CUDA_ARCH_LIST` and the build program
 For training on multiple machines with the same setup, you'll need to make a binary wheel:

 ```bash
-git clone https://github.com/deepspeedai/DeepSpeed/
+git clone https://github.com/microsoft/DeepSpeed/
 cd DeepSpeed
 rm -rf build
 TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 \
--- a/docs/source/en/deepspeed.md
+++ b/docs/source/en/deepspeed.md
@ -28,7 +28,7 @@ This guide will walk you through how to deploy DeepSpeed training, the features

 ## Installation

-DeepSpeed is available to install from PyPI or Transformers (for more detailed installation options, take a look at the DeepSpeed [installation details](https://www.deepspeed.ai/tutorials/advanced-install/) or the GitHub [README](https://github.com/deepspeedai/DeepSpeed#installation)).
+DeepSpeed is available to install from PyPI or Transformers (for more detailed installation options, take a look at the DeepSpeed [installation details](https://www.deepspeed.ai/tutorials/advanced-install/) or the GitHub [README](https://github.com/microsoft/deepspeed#installation)).

 <Tip>

@ -114,10 +114,10 @@ DeepSpeed works with the [`Trainer`] class by way of a config file containing al

 <Tip>

-Find a complete list of DeepSpeed configuration options on the [DeepSpeed Configuration JSON](https://www.deepspeed.ai/docs/config-json/) reference. You can also find more practical examples of various DeepSpeed configuration examples on the [DeepSpeedExamples](https://github.com/deepspeedai/DeepSpeedExamples) repository or the main [DeepSpeed](https://github.com/deepspeedai/DeepSpeed) repository. To quickly find specific examples, you can:
+Find a complete list of DeepSpeed configuration options on the [DeepSpeed Configuration JSON](https://www.deepspeed.ai/docs/config-json/) reference. You can also find more practical examples of various DeepSpeed configuration examples on the [DeepSpeedExamples](https://github.com/microsoft/DeepSpeedExamples) repository or the main [DeepSpeed](https://github.com/microsoft/DeepSpeed) repository. To quickly find specific examples, you can:

 ```bash
-git clone https://github.com/deepspeedai/DeepSpeedExamples
+git clone https://github.com/microsoft/DeepSpeedExamples
 cd DeepSpeedExamples
 find . -name '*json'
 # find examples with the Lamb optimizer
@ -303,7 +303,7 @@ For more information about initializing large models with ZeRO-3 and accessing t

 [ZeRO-Infinity](https://hf.co/papers/2104.07857) allows offloading model states to the CPU and/or NVMe to save even more memory. Smart partitioning and tiling algorithms allow each GPU to send and receive very small amounts of data during offloading such that a modern NVMe can fit an even larger total memory pool than is available to your training process. ZeRO-Infinity requires ZeRO-3.

-Depending on the CPU and/or NVMe memory available, you can offload both the [optimizer states](https://www.deepspeed.ai/docs/config-json/#optimizer-offloading) and [parameters](https://www.deepspeed.ai/docs/config-json/#parameter-offloading), just one of them, or none. You should also make sure the `nvme_path` is pointing to an NVMe device, because while it still works with a normal hard drive or solid state drive, it'll be significantly slower. With a modern NVMe, you can expect peak transfer speeds of ~3.5GB/s for read and ~3GB/s for write operations. Lastly, [run a benchmark](https://github.com/deepspeedai/DeepSpeed/issues/998) on your training setup to determine the optimal `aio` configuration.
+Depending on the CPU and/or NVMe memory available, you can offload both the [optimizer states](https://www.deepspeed.ai/docs/config-json/#optimizer-offloading) and [parameters](https://www.deepspeed.ai/docs/config-json/#parameter-offloading), just one of them, or none. You should also make sure the `nvme_path` is pointing to an NVMe device, because while it still works with a normal hard drive or solid state drive, it'll be significantly slower. With a modern NVMe, you can expect peak transfer speeds of ~3.5GB/s for read and ~3GB/s for write operations. Lastly, [run a benchmark](https://github.com/microsoft/DeepSpeed/issues/998) on your training setup to determine the optimal `aio` configuration.

 The example ZeRO-3/Infinity configuration file below sets most of the parameter values to `auto`, but you could also manually add these values.

@ -586,20 +586,6 @@ You can choose the communication data type by setting the `communication_data_ty
 }
 ```

-### Universal Checkpointing
-
-[Universal Checkpointing](https://www.deepspeed.ai/tutorials/universal-checkpointing) is an efficient and flexible feature for saving and loading model checkpoints. It enables seamless model training continuation and fine-tuning across different model architectures, parallelism techniques, and training configurations.
-
-Resume training with a universal checkpoint by setting [load_universal](https://www.deepspeed.ai/docs/config-json/#checkpoint-options) to `true` in the config file.
-
-```yaml
-{
-    "checkpoint": {
-        "load_universal": true
-    }
-}
-```
-
 ## Deployment

 DeepSpeed can be deployed by different launchers such as [torchrun](https://pytorch.org/docs/stable/elastic/run.html), the `deepspeed` launcher, or [Accelerate](https://huggingface.co/docs/accelerate/basic_tutorials/launch#using-accelerate-launch). To deploy, add `--deepspeed ds_config.json` to the [`Trainer`] command line. It’s recommended to use DeepSpeed’s [`add_config_arguments`](https://deepspeed.readthedocs.io/en/latest/initialize.html#argument-parsing) utility to add any necessary command line arguments to your code.
@ -1157,7 +1143,7 @@ For Transformers>=4.28, if `synced_gpus` is automatically set to `True` if multi

 ## Troubleshoot

-When you encounter an issue, you should consider whether DeepSpeed is the cause of the problem because often it isn't (unless it's super obviously and you can see DeepSpeed modules in the exception)! The first step should be to retry your setup without DeepSpeed, and if the problem persists, then you can report the issue. If the issue is a core DeepSpeed problem and unrelated to the Transformers integration, open an Issue on the [DeepSpeed repository](https://github.com/deepspeedai/DeepSpeed).
+When you encounter an issue, you should consider whether DeepSpeed is the cause of the problem because often it isn't (unless it's super obviously and you can see DeepSpeed modules in the exception)! The first step should be to retry your setup without DeepSpeed, and if the problem persists, then you can report the issue. If the issue is a core DeepSpeed problem and unrelated to the Transformers integration, open an Issue on the [DeepSpeed repository](https://github.com/microsoft/DeepSpeed).

 For issues related to the Transformers integration, please provide the following information:

@ -1227,7 +1213,7 @@ This means the DeepSpeed loss scaler is unable to find a scaling coefficient to

 ## Resources

-DeepSpeed ZeRO is a powerful technology for training and loading very large models for inference with limited GPU resources, making it more accessible to everyone. To learn more about DeepSpeed, feel free to read the [blog posts](https://www.microsoft.com/en-us/research/search/?q=deepspeed), [documentation](https://www.deepspeed.ai/getting-started/), and [GitHub repository](https://github.com/deepspeedai/DeepSpeed). 
+DeepSpeed ZeRO is a powerful technology for training and loading very large models for inference with limited GPU resources, making it more accessible to everyone. To learn more about DeepSpeed, feel free to read the [blog posts](https://www.microsoft.com/en-us/research/search/?q=deepspeed), [documentation](https://www.deepspeed.ai/getting-started/), and [GitHub repository](https://github.com/microsoft/deepspeed). 

 The following papers are also a great resource for learning more about ZeRO:

--- a/docs/source/en/fsdp.md
+++ b/docs/source/en/fsdp.md
@ -58,7 +58,7 @@ Otherwise, you can choose a size-based wrapping policy where FSDP is applied to

 ### Checkpointing

-Intermediate checkpoints should be saved with `fsdp_state_dict_type: SHARDED_STATE_DICT` because saving the full state dict with CPU offloading on rank 0 takes a lot of time and often results in `NCCL Timeout` errors due to indefinite hanging during broadcasting. You can resume training with the sharded state dicts with the [`~accelerate.Accelerator.load_state`] method.
+Intermediate checkpoints should be saved with `fsdp_state_dict_type: SHARDED_STATE_DICT` because saving the full state dict with CPU offloading on rank 0 takes a lot of time and often results in `NCCL Timeout` errors due to indefinite hanging during broadcasting. You can resume training with the sharded state dicts with the [`~accelerate.Accelerator.load_state`]` method.

 ```py
 # directory containing checkpoints
--- a/docs/source/en/generation_strategies.md
+++ b/docs/source/en/generation_strategies.md
@ -41,13 +41,6 @@ This guide describes:
 * common decoding strategies and their main parameters
 * saving and sharing custom generation configurations with your fine-tuned model on 🤗 Hub

-<Tip>
-
-`generate()` is a critical component of our [chat CLI](quicktour#chat-with-text-generation-models).
-You can apply the learnings of this guide there as well.
-
-</Tip>
-
 ## Default text generation configuration

 A decoding strategy for a model is defined in its generation configuration. When using pre-trained models for inference
@ -103,12 +96,6 @@ distribution over the entire vocabulary with various strategy-specific adjustmen
 the decoding strategies that support multiple sequence candidates, e.g. variations of beam search and sampling. Decoding
 strategies like greedy search and contrastive search return a single output sequence.

-It is also possible to extend `generate()` with external libraries or handcrafted code. The `logits_processor` argument
-allows you to pass custom [`LogitsProcessor`] instances, allowing you to manipulate the next token probability
-distributions. Likewise, the `stopping_criteria` argument lets you set custom [`StoppingCriteria`] to stop text generation.
-The [`logits-processor-zoo`](https://github.com/NVIDIA/logits-processor-zoo) library contains examples of external
-`generate()`-compatible extensions.
-
 ## Save a custom decoding strategy with your model

 If you would like to share your fine-tuned model with a specific generation configuration, you can:
@ -231,7 +218,7 @@ to check if the text is machine-generated (outputs `True` for machine-generated
 >>> detector = WatermarkDetector(model_config=model.config, device="cpu", watermarking_config=watermarking_config)
 >>> detection_out = detector(out, return_dict=True)
 >>> detection_out.prediction
-array([ True,  True])
+array([True, True])
 ```


@ -269,7 +256,7 @@ dimension you can act upon, in addition to selecting a decoding strategy. Popula
 >>> model = AutoModelForCausalLM.from_pretrained(checkpoint)
 >>> outputs = model.generate(**inputs)
 >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
-['I look forward to seeing you all again!\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n']
+['I look forward to seeing you all again!\n\n\n\n\n\n\n\n\n\n\n']
 ```

 ### Contrastive search
@ -445,31 +432,9 @@ To enable assisted decoding, set the `assistant_model` argument with a model.
 >>> assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
 >>> outputs = model.generate(**inputs, assistant_model=assistant_model)
 >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
-['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a glass of wine.']
+['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a']
 ```

-<Tip>
-
-If you're using a `pipeline` object, all you need to do is to pass the assistant checkpoint under `assistant_model`
-
-```python
->>> from transformers import pipeline
->>> import torch
-
->>> pipe = pipeline(
-...     "text-generation",
-...     model="meta-llama/Llama-3.1-8B",
-...     assistant_model="meta-llama/Llama-3.2-1B",  # This extra line is all that's needed, also works with UAD
-...     torch_dtype=torch.bfloat16
-... )
->>> pipe_output = pipe("Once upon a time, ", max_new_tokens=50, do_sample=False)
->>> pipe_output[0]["generated_text"]
-'Once upon a time, 3D printing was a niche technology that was only'
-```
-
-</Tip>
-
-
 When using assisted decoding with sampling methods, you can use the `temperature` argument to control the randomness,
 just like in multinomial sampling. However, in assisted decoding, reducing the temperature may help improve the latency.

@ -488,7 +453,7 @@ just like in multinomial sampling. However, in assisted decoding, reducing the t
 >>> assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
 >>> outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.5)
 >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
-['Alice and Bob are two people who are very different, but they are both very good at what they do. Alice']
+['Alice and Bob, a couple of friends of mine, who are both in the same office as']
 ```

 We recommend to install `scikit-learn` library to enhance the candidate generation strategy and achieve additional speedup.
@ -518,7 +483,7 @@ to ensure the new tokens include the correct prompt suffix.
 >>> assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
 >>> outputs = model.generate(**inputs, assistant_model=assistant_model, tokenizer=tokenizer, assistant_tokenizer=assistant_tokenizer)
 >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
-['Alice and Bob are playing a game. Alice has a set of $n$ integers $a_1, a']
+['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a']
 ```

 #### Prompt Lookup
@ -547,7 +512,7 @@ If the model you're using was trained to do early exit, you can pass
 >>> model = AutoModelForCausalLM.from_pretrained(checkpoint)
 >>> outputs = model.generate(**inputs, assistant_early_exit=4, do_sample=False, max_new_tokens=20)
 >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
-['Alice and Bob are playing a game. Alice has a set of $n$ integers $a_1, a']
+['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a']
 ```

 ### DoLa Decoding
@ -571,9 +536,10 @@ See the following examples for DoLa decoding with the 32-layer LLaMA-7B model.
 >>> import torch
 >>> from accelerate.test_utils.testing import get_backend

->>> device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
 >>> tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b")
->>> model = AutoModelForCausalLM.from_pretrained("huggyllama/llama-7b", torch_dtype=torch.float16).to(device)
+>>> model = AutoModelForCausalLM.from_pretrained("huggyllama/llama-7b", torch_dtype=torch.float16)
+>>> device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
+>>> model.to(device)
 >>> set_seed(42)

 >>> text = "On what date was the Declaration of Independence officially signed?"
@ -592,7 +558,7 @@ See the following examples for DoLa decoding with the 32-layer LLaMA-7B model.
 # DoLa decoding with contrasting specific layers (layers 28 and 30)
 >>> dola_custom_output = model.generate(**inputs, do_sample=False, max_new_tokens=50, dola_layers=[28,30], repetition_penalty=1.2)
 >>> tokenizer.batch_decode(dola_custom_output[:, inputs.input_ids.shape[-1]:], skip_special_tokens=True)
-['\nIn 1891, when he was 54 years old, John Jacob Astor founded his empire. He opened a one-man business and spent the next 27 years working 10-hour days. When']
+['\nIt was officially signed on 2 August 1776, when 56 members of the Second Continental Congress, representing the original 13 American colonies, voted unanimously for the resolution for independence. The 2']
 ```

 #### Understanding the `dola_layers` argument
--- a/docs/source/en/gguf.md
+++ b/docs/source/en/gguf.md
@ -88,7 +88,6 @@ For now the supported model architectures are the architectures that have been v
 - T5
 - Mamba
 - Nemotron
- Gemma2

 ## Example usage

--- a/docs/source/en/how_to_hack_models.md
+++ b/docs/source/en/how_to_hack_models.md
@ -24,37 +24,7 @@ You'll learn how to:
 - Modify a model's architecture by changing its attention mechanism.
 - Apply techniques like Low-Rank Adaptation (LoRA) to specific model components.

-We encourage you to contribute your own hacks and share them here with the community!
-
-## Efficient Development Workflow
-
-When modifying model code, you'll often need to test your changes without restarting your Python session. The `clear_import_cache()` utility helps with this workflow, especially during model development and contribution when you need to frequently test and compare model outputs:
-
-```python
-from transformers import AutoModel
-model = AutoModel.from_pretrained("bert-base-uncased")
-
-# Make modifications to the transformers code...
-
-# Clear the cache to reload the modified code
-from transformers.utils.import_utils import clear_import_cache
-clear_import_cache()
-
-# Reimport to get the changes
-from transformers import AutoModel
-model = AutoModel.from_pretrained("bert-base-uncased")  # Will use updated code
-```
-
-This is particularly useful when:
- Iteratively modifying model architectures
- Debugging model implementations 
- Testing changes during model development
- Comparing outputs between original and modified versions
- Working on model contributions
-
-The `clear_import_cache()` function removes all cached Transformers modules and allows Python to reload the modified code. This enables rapid development cycles without constantly restarting your environment.
-
-This workflow is especially valuable when implementing new models, where you need to frequently compare outputs between the original implementation and your Transformers version (as described in the [Add New Model](https://huggingface.co/docs/transformers/add_new_model) guide).
+We encourage you to contribute your own hacks and share them here with the community1

 ## Example: Modifying the Attention Mechanism in the Segment Anything Model (SAM)

--- a/docs/source/en/index.md
+++ b/docs/source/en/index.md
@ -62,11 +62,8 @@ Flax), PyTorch, and/or TensorFlow.
 |                        [ALBERT](model_doc/albert)                        |       ✅        |         ✅         |      ✅      |
 |                         [ALIGN](model_doc/align)                         |       ✅        |         ❌         |      ❌      |
 |                       [AltCLIP](model_doc/altclip)                       |       ✅        |         ❌         |      ❌      |
-|                          [Aria](model_doc/aria)                          |       ✅        |         ❌         |      ❌      |
-|                     [AriaText](model_doc/aria_text)                      |       ✅        |         ❌         |      ❌      |
 | [Audio Spectrogram Transformer](model_doc/audio-spectrogram-transformer) |       ✅        |         ❌         |      ❌      |
 |                    [Autoformer](model_doc/autoformer)                    |       ✅        |         ❌         |      ❌      |
-|                         [Bamba](model_doc/bamba)                         |       ✅        |         ❌         |      ❌      |
 |                          [Bark](model_doc/bark)                          |       ✅        |         ❌         |      ❌      |
 |                          [BART](model_doc/bart)                          |       ✅        |         ✅         |      ✅      |
 |                       [BARThez](model_doc/barthez)                       |       ✅        |         ✅         |      ✅      |
@ -100,8 +97,6 @@ Flax), PyTorch, and/or TensorFlow.
 |                       [CodeGen](model_doc/codegen)                       |       ✅        |         ❌         |      ❌      |
 |                    [CodeLlama](model_doc/code_llama)                     |       ✅        |         ❌         |      ✅      |
 |                        [Cohere](model_doc/cohere)                        |       ✅        |         ❌         |      ❌      |
-|                       [Cohere2](model_doc/cohere2)                       |       ✅        |         ❌         |      ❌      |
-|                       [ColPali](model_doc/colpali)                       |       ✅        |         ❌         |      ❌      |
 |              [Conditional DETR](model_doc/conditional_detr)              |       ✅        |         ❌         |      ❌      |
 |                      [ConvBERT](model_doc/convbert)                      |       ✅        |         ✅         |      ❌      |
 |                      [ConvNeXT](model_doc/convnext)                      |       ✅        |         ✅         |      ❌      |
@ -110,7 +105,6 @@ Flax), PyTorch, and/or TensorFlow.
 |                       [CPM-Ant](model_doc/cpmant)                        |       ✅        |         ❌         |      ❌      |
 |                          [CTRL](model_doc/ctrl)                          |       ✅        |         ✅         |      ❌      |
 |                           [CvT](model_doc/cvt)                           |       ✅        |         ✅         |      ❌      |
-|                      [DAB-DETR](model_doc/dab-detr)                      |       ✅        |         ❌         |      ❌      |
 |                           [DAC](model_doc/dac)                           |       ✅        |         ❌         |      ❌      |
 |                   [Data2VecAudio](model_doc/data2vec)                    |       ✅        |         ❌         |      ❌      |
 |                    [Data2VecText](model_doc/data2vec)                    |       ✅        |         ❌         |      ❌      |
@ -123,14 +117,11 @@ Flax), PyTorch, and/or TensorFlow.
 |                          [DeiT](model_doc/deit)                          |       ✅        |         ✅         |      ❌      |
 |                        [DePlot](model_doc/deplot)                        |       ✅        |         ❌         |      ❌      |
 |                [Depth Anything](model_doc/depth_anything)                |       ✅        |         ❌         |      ❌      |
-|                     [DepthPro](model_doc/depth_pro)                      |       ✅        |         ❌         |      ❌      |
 |                          [DETA](model_doc/deta)                          |       ✅        |         ❌         |      ❌      |
 |                          [DETR](model_doc/detr)                          |       ✅        |         ❌         |      ❌      |
 |                      [DialoGPT](model_doc/dialogpt)                      |       ✅        |         ✅         |      ✅      |
-|                     [DiffLlama](model_doc/diffllama)                     |       ✅        |         ❌         |      ❌      |
 |                         [DiNAT](model_doc/dinat)                         |       ✅        |         ❌         |      ❌      |
 |                        [DINOv2](model_doc/dinov2)                        |       ✅        |         ❌         |      ✅      |
-|         [DINOv2 with Registers](model_doc/dinov2_with_registers)         |       ✅        |         ❌         |      ❌      |
 |                    [DistilBERT](model_doc/distilbert)                    |       ✅        |         ✅         |      ✅      |
 |                           [DiT](model_doc/dit)                           |       ✅        |         ❌         |      ✅      |
 |                       [DonutSwin](model_doc/donut)                       |       ✅        |         ❌         |      ❌      |
@ -139,7 +130,6 @@ Flax), PyTorch, and/or TensorFlow.
 |               [EfficientFormer](model_doc/efficientformer)               |       ✅        |         ✅         |      ❌      |
 |                  [EfficientNet](model_doc/efficientnet)                  |       ✅        |         ❌         |      ❌      |
 |                       [ELECTRA](model_doc/electra)                       |       ✅        |         ✅         |      ✅      |
-|                          [Emu3](model_doc/emu3)                          |       ✅        |         ❌         |      ❌      |
 |                       [EnCodec](model_doc/encodec)                       |       ✅        |         ❌         |      ❌      |
 |               [Encoder decoder](model_doc/encoder-decoder)               |       ✅        |         ✅         |      ✅      |
 |                         [ERNIE](model_doc/ernie)                         |       ✅        |         ❌         |      ❌      |
@ -147,7 +137,6 @@ Flax), PyTorch, and/or TensorFlow.
 |                           [ESM](model_doc/esm)                           |       ✅        |         ✅         |      ❌      |
 |              [FairSeq Machine-Translation](model_doc/fsmt)               |       ✅        |         ❌         |      ❌      |
 |                        [Falcon](model_doc/falcon)                        |       ✅        |         ❌         |      ❌      |
-|                       [Falcon3](model_doc/falcon3)                       |       ✅        |         ❌         |      ✅      |
 |                  [FalconMamba](model_doc/falcon_mamba)                   |       ✅        |         ❌         |      ❌      |
 |         [FastSpeech2Conformer](model_doc/fastspeech2_conformer)          |       ✅        |         ❌         |      ❌      |
 |                       [FLAN-T5](model_doc/flan-t5)                       |       ✅        |         ✅         |      ✅      |
@ -163,7 +152,6 @@ Flax), PyTorch, and/or TensorFlow.
 |                           [GIT](model_doc/git)                           |       ✅        |         ❌         |      ❌      |
 |                           [GLM](model_doc/glm)                           |       ✅        |         ❌         |      ❌      |
 |                          [GLPN](model_doc/glpn)                          |       ✅        |         ❌         |      ❌      |
-|                      [GOT-OCR2](model_doc/got_ocr2)                      |       ✅        |         ❌         |      ❌      |
 |                       [GPT Neo](model_doc/gpt_neo)                       |       ✅        |         ❌         |      ✅      |
 |                      [GPT NeoX](model_doc/gpt_neox)                      |       ✅        |         ❌         |      ❌      |
 |             [GPT NeoX Japanese](model_doc/gpt_neox_japanese)             |       ✅        |         ❌         |      ❌      |
@ -173,11 +161,9 @@ Flax), PyTorch, and/or TensorFlow.
 |               [GPTSAN-japanese](model_doc/gptsan-japanese)               |       ✅        |         ❌         |      ❌      |
 |                       [Granite](model_doc/granite)                       |       ✅        |         ❌         |      ❌      |
 |                  [GraniteMoeMoe](model_doc/granitemoe)                   |       ✅        |         ❌         |      ❌      |
-|            [GraniteMoeSharedMoe](model_doc/granitemoeshared)             |       ✅        |         ❌         |      ❌      |
 |                    [Graphormer](model_doc/graphormer)                    |       ✅        |         ❌         |      ❌      |
 |                [Grounding DINO](model_doc/grounding-dino)                |       ✅        |         ❌         |      ❌      |
 |                      [GroupViT](model_doc/groupvit)                      |       ✅        |         ✅         |      ❌      |
-|                        [Helium](model_doc/helium)                        |       ✅        |         ❌         |      ❌      |
 |                       [HerBERT](model_doc/herbert)                       |       ✅        |         ✅         |      ✅      |
 |                         [Hiera](model_doc/hiera)                         |       ✅        |         ❌         |      ❌      |
 |                        [Hubert](model_doc/hubert)                        |       ✅        |         ✅         |      ❌      |
@ -186,7 +172,6 @@ Flax), PyTorch, and/or TensorFlow.
 |                       [IDEFICS](model_doc/idefics)                       |       ✅        |         ✅         |      ❌      |
 |                      [Idefics2](model_doc/idefics2)                      |       ✅        |         ❌         |      ❌      |
 |                      [Idefics3](model_doc/idefics3)                      |       ✅        |         ❌         |      ❌      |
-|          [Idefics3VisionTransformer](model_doc/idefics3_vision)          |       ❌        |         ❌         |      ❌      |
 |                      [ImageGPT](model_doc/imagegpt)                      |       ✅        |         ❌         |      ❌      |
 |                      [Informer](model_doc/informer)                      |       ✅        |         ❌         |      ❌      |
 |                  [InstructBLIP](model_doc/instructblip)                  |       ✅        |         ❌         |      ❌      |
@ -240,8 +225,6 @@ Flax), PyTorch, and/or TensorFlow.
 |                  [MobileNetV2](model_doc/mobilenet_v2)                   |       ✅        |         ❌         |      ❌      |
 |                     [MobileViT](model_doc/mobilevit)                     |       ✅        |         ✅         |      ❌      |
 |                   [MobileViTV2](model_doc/mobilevitv2)                   |       ✅        |         ❌         |      ❌      |
-|                    [ModernBERT](model_doc/modernbert)                    |       ✅        |         ❌         |      ❌      |
-|                     [Moonshine](model_doc/moonshine)                     |       ✅        |         ❌         |      ❌      |
 |                         [Moshi](model_doc/moshi)                         |       ✅        |         ❌         |      ❌      |
 |                         [MPNet](model_doc/mpnet)                         |       ✅        |         ✅         |      ❌      |
 |                           [MPT](model_doc/mpt)                           |       ✅        |         ❌         |      ❌      |
@ -289,7 +272,6 @@ Flax), PyTorch, and/or TensorFlow.
 |                        [PVTv2](model_doc/pvt_v2)                         |       ✅        |         ❌         |      ❌      |
 |                       [QDQBert](model_doc/qdqbert)                       |       ✅        |         ❌         |      ❌      |
 |                         [Qwen2](model_doc/qwen2)                         |       ✅        |         ❌         |      ❌      |
-|                    [Qwen2_5_VL](model_doc/qwen2_5_vl)                    |       ✅        |         ❌         |      ❌      |
 |                   [Qwen2Audio](model_doc/qwen2_audio)                    |       ✅        |         ❌         |      ❌      |
 |                     [Qwen2MoE](model_doc/qwen2_moe)                      |       ✅        |         ❌         |      ❌      |
 |                      [Qwen2VL](model_doc/qwen2_vl)                       |       ✅        |         ❌         |      ❌      |
@ -307,7 +289,6 @@ Flax), PyTorch, and/or TensorFlow.
 |                      [RoFormer](model_doc/roformer)                      |       ✅        |         ✅         |      ✅      |
 |                       [RT-DETR](model_doc/rt_detr)                       |       ✅        |         ❌         |      ❌      |
 |                [RT-DETR-ResNet](model_doc/rt_detr_resnet)                |       ✅        |         ❌         |      ❌      |
-|                    [RT-DETRv2](model_doc/rt_detr_v2)                     |       ✅        |         ❌         |      ❌      |
 |                          [RWKV](model_doc/rwkv)                          |       ✅        |         ❌         |      ❌      |
 |                           [SAM](model_doc/sam)                           |       ✅        |         ✅         |      ❌      |
 |                  [SeamlessM4T](model_doc/seamless_m4t)                   |       ✅        |         ❌         |      ❌      |
@ -324,7 +305,6 @@ Flax), PyTorch, and/or TensorFlow.
 |                   [SqueezeBERT](model_doc/squeezebert)                   |       ✅        |         ❌         |      ❌      |
 |                      [StableLm](model_doc/stablelm)                      |       ✅        |         ❌         |      ❌      |
 |                    [Starcoder2](model_doc/starcoder2)                    |       ✅        |         ❌         |      ❌      |
-|                     [SuperGlue](model_doc/superglue)                     |       ✅        |         ❌         |      ❌      |
 |                    [SuperPoint](model_doc/superpoint)                    |       ✅        |         ❌         |      ❌      |
 |                   [SwiftFormer](model_doc/swiftformer)                   |       ✅        |         ✅         |      ❌      |
 |                    [Swin Transformer](model_doc/swin)                    |       ✅        |         ✅         |      ❌      |
@ -336,10 +316,8 @@ Flax), PyTorch, and/or TensorFlow.
 |             [Table Transformer](model_doc/table-transformer)             |       ✅        |         ❌         |      ❌      |
 |                         [TAPAS](model_doc/tapas)                         |       ✅        |         ✅         |      ❌      |
 |                         [TAPEX](model_doc/tapex)                         |       ✅        |         ✅         |      ✅      |
-|                       [TextNet](model_doc/textnet)                       |       ✅        |         ❌         |      ❌      |
 |       [Time Series Transformer](model_doc/time_series_transformer)       |       ✅        |         ❌         |      ❌      |
 |                   [TimeSformer](model_doc/timesformer)                   |       ✅        |         ❌         |      ❌      |
-|                [TimmWrapperModel](model_doc/timm_wrapper)                |       ✅        |         ❌         |      ❌      |
 |        [Trajectory Transformer](model_doc/trajectory_transformer)        |       ✅        |         ❌         |      ❌      |
 |                  [Transformer-XL](model_doc/transfo-xl)                  |       ✅        |         ✅         |      ❌      |
 |                         [TrOCR](model_doc/trocr)                         |       ✅        |         ❌         |      ❌      |
@ -366,8 +344,6 @@ Flax), PyTorch, and/or TensorFlow.
 |                       [ViTMAE](model_doc/vit_mae)                        |       ✅        |         ✅         |      ❌      |
 |                      [ViTMatte](model_doc/vitmatte)                      |       ✅        |         ❌         |      ❌      |
 |                       [ViTMSN](model_doc/vit_msn)                        |       ✅        |         ❌         |      ❌      |
-|                       [ViTPose](model_doc/vitpose)                       |       ✅        |         ❌         |      ❌      |
-|              [ViTPoseBackbone](model_doc/vitpose_backbone)               |       ✅        |         ❌         |      ❌      |
 |                          [VITS](model_doc/vits)                          |       ✅        |         ❌         |      ❌      |
 |                         [ViViT](model_doc/vivit)                         |       ✅        |         ❌         |      ❌      |
 |                      [Wav2Vec2](model_doc/wav2vec2)                      |       ✅        |         ✅         |      ✅      |
@ -390,7 +366,6 @@ Flax), PyTorch, and/or TensorFlow.
 |                         [YOLOS](model_doc/yolos)                         |       ✅        |         ❌         |      ❌      |
 |                          [YOSO](model_doc/yoso)                          |       ✅        |         ❌         |      ❌      |
 |                         [Zamba](model_doc/zamba)                         |       ✅        |         ❌         |      ❌      |
-|                        [Zamba2](model_doc/zamba2)                        |       ✅        |         ❌         |      ❌      |
 |                      [ZoeDepth](model_doc/zoedepth)                      |       ✅        |         ❌         |      ❌      |

 <!-- End table-->
--- a/docs/source/en/installation.md
+++ b/docs/source/en/installation.md
@ -32,40 +32,29 @@ Install 🤗 Transformers for whichever deep learning library you're working wit

 You should install 🤗 Transformers in a [virtual environment](https://docs.python.org/3/library/venv.html). If you're unfamiliar with Python virtual environments, take a look at this [guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/). A virtual environment makes it easier to manage different projects, and avoid compatibility issues between dependencies.

-Create a virtual environment with [uv](https://docs.astral.sh/uv/) (refer to [Installation](https://docs.astral.sh/uv/getting-started/installation/) for installation instructions), a fast Rust-based Python package and project manager.
+Start by creating a virtual environment in your project directory:

 ```bash
-uv venv my-env
-source my-env/bin/activate
+python -m venv .env
 ```

-Now you're ready to install 🤗 Transformers with pip or uv.
-
-<hfoptions id="install">
-<hfoption id="uv">
+Activate the virtual environment. On Linux and MacOs:

 ```bash
-uv pip install transformers
+source .env/bin/activate
+```
+Activate Virtual environment on Windows
+
+```bash
+.env/Scripts/activate
 ```

-</hfoption>
-<hfoption id="pip">
+Now you're ready to install 🤗 Transformers with the following command:

 ```bash
 pip install transformers
 ```

-</hfoption>
-</hfoptions>
-
-For GPU acceleration, install the appropriate CUDA drivers for [PyTorch](https://pytorch.org/get-started/locally) and TensorFlow(https://www.tensorflow.org/install/pip).
-
-Run the command below to check if your system detects an NVIDIA GPU.
-
-```bash
-nvidia-smi
-```
-
 For CPU-support only, you can conveniently install 🤗 Transformers and a deep learning library in one line. For example, install 🤗 Transformers and PyTorch with:

 ```bash
@ -265,36 +254,3 @@ Once your file is downloaded and locally cached, specify it's local path to load
 See the [How to download files from the Hub](https://huggingface.co/docs/hub/how-to-downstream) section for more details on downloading files stored on the Hub.

 </Tip>
-
-## Troubleshooting
-
-See below for some of the more common installation issues and how to resolve them.
-
-### Unsupported Python version
-
-Ensure you are using Python 3.9 or later. Run the command below to check your Python version.
-
-```
-python --version
-```
-
-### Missing dependencies
-
-Install all required dependencies by running the following command. Ensure you’re in the project directory before executing the command.
-
-```
-pip install -r requirements.txt
-```
-
-### Windows-specific
-
-If you encounter issues on Windows, you may need to activate Developer Mode. Navigate to Windows Settings > For Developers > Developer Mode.
-
-Alternatively, create and activate a virtual environment as shown below.
-
-```
-python -m venv env
-.\env\Scripts\activate
-```
-
-
--- a/docs/source/en/internal/generation_utils.md
+++ b/docs/source/en/internal/generation_utils.md
@ -352,8 +352,6 @@ A [`Constraint`] can be used to force the generation to include specific tokens

 [[autodoc]] TextIteratorStreamer

-[[autodoc]] AsyncTextIteratorStreamer
-
 ## Caches

 [[autodoc]] Cache
--- a/docs/source/en/kv_cache.md
+++ b/docs/source/en/kv_cache.md
@ -56,16 +56,16 @@ More concretely, key-value cache acts as a memory bank for these generative mode
  >>> import torch
  >>> from transformers import AutoTokenizer, AutoModelForCausalLM, DynamicCache

-  >>> model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-  >>> model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
+  >>> model_id = "meta-llama/Llama-2-7b-chat-hf"
+  >>> model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="cuda:0")
  >>> tokenizer = AutoTokenizer.from_pretrained(model_id)

  >>> past_key_values = DynamicCache()
  >>> messages = [{"role": "user", "content": "Hello, what's your name."}]
-  >>> inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt", return_dict=True).to(model.device)
+  >>> inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt", return_dict=True).to("cuda:0")

  >>> generated_ids = inputs.input_ids
-  >>> cache_position = torch.arange(inputs.input_ids.shape[1], dtype=torch.int64, device=model.device)
+  >>> cache_position = torch.arange(inputs.input_ids.shape[1], dtype=torch.int64, device="cuda:0")
  >>> max_new_tokens = 10

  >>> for _ in range(max_new_tokens):
@ -82,13 +82,7 @@ More concretely, key-value cache acts as a memory bank for these generative mode
  ...     cache_position = cache_position[-1:] + 1 # add one more position for the next token

  >>> print(tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0])
-  ```
-  ```txt
-  <|user|>
-  Hello, what's your name. 
-  <|assistant|>
-  My name is Sarah. 
-  <|
+  "[INST] Hello, what's your name. [/INST]  Hello! My name is LLaMA,"
  ```

 </details>
@ -138,13 +132,17 @@ Cache quantization can be detrimental in terms of latency if the context length
 >>> import torch
 >>> from transformers import AutoTokenizer, AutoModelForCausalLM

->>> tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
->>> model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto")
+>>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+>>> model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16).to("cuda:0")
 >>> inputs = tokenizer("I like rock music because", return_tensors="pt").to(model.device)

 >>> out = model.generate(**inputs, do_sample=False, max_new_tokens=20, cache_implementation="quantized", cache_config={"nbits": 4, "backend": "quanto"})
 >>> print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
-I like rock music because it's a great way to express myself. I like the way it makes me feel, the
+I like rock music because it's loud and energetic. It's a great way to express myself and rel
+
+>>> out = model.generate(**inputs, do_sample=False, max_new_tokens=20)
+>>> print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
+I like rock music because it's loud and energetic. I like to listen to it when I'm feeling
 ```

 ### Offloaded Cache
@ -168,7 +166,7 @@ Use `cache_implementation="offloaded_static"` for an offloaded static cache (see
 >>> ckpt = "microsoft/Phi-3-mini-4k-instruct"

 >>> tokenizer = AutoTokenizer.from_pretrained(ckpt)
->>> model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16, device_map="auto")
+>>> model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16).to("cuda:0")
 >>> inputs = tokenizer("Fun fact: The shortest", return_tensors="pt").to(model.device)

 >>> out = model.generate(**inputs, do_sample=False, max_new_tokens=23, cache_implementation="offloaded")
@ -233,14 +231,14 @@ For more examples with Static Cache and JIT compilation, take a look at [StaticC
 >>> import torch
 >>> from transformers import AutoTokenizer, AutoModelForCausalLM

->>> tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
->>> model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto")
+>>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+>>> model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16, device_map="auto")
 >>> inputs = tokenizer("Hello, my name is", return_tensors="pt").to(model.device)

 >>> # simply pass the cache implementation="static"
 >>> out = model.generate(**inputs, do_sample=False, max_new_tokens=20, cache_implementation="static")
 >>> tokenizer.batch_decode(out, skip_special_tokens=True)[0]
-"Hello, my name is [Your Name] and I am a [Your Position] at [Your Company]. I am writing"
+"Hello, my name is [Your Name], and I am a [Your Profession] with [Number of Years] of"
 ```


@ -258,7 +256,7 @@ This will use the [`~OffloadedStaticCache`] implementation instead.
 >>> model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16, device_map="auto")
 >>> inputs = tokenizer("Hello, my name is", return_tensors="pt").to(model.device)

->>> # simply pass the cache implementation="offloaded_static"
+>>> # simply pass the cache implementation="static"
 >>> out = model.generate(**inputs, do_sample=False, max_new_tokens=20, cache_implementation="offloaded_static")
 >>> tokenizer.batch_decode(out, skip_special_tokens=True)[0]
 "Hello, my name is [Your Name], and I am a [Your Profession] with [Number of Years] of"
@ -277,14 +275,14 @@ Note that you can use this cache only for models that support sliding window, e.
 >>> import torch
 >>> from transformers import AutoTokenizer, AutoModelForCausalLM, SinkCache

->>> tokenizer = AutoTokenizer.from_pretrained("teknium/OpenHermes-2.5-Mistral-7B")
->>> model = AutoModelForCausalLM.from_pretrained("teknium/OpenHermes-2.5-Mistral-7B", torch_dtype=torch.float16, device_map="auto")
+>>> tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+>>> model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", torch_dtype=torch.float16).to("cuda:0")
 >>> inputs = tokenizer("Yesterday I was on a rock concert and.", return_tensors="pt").to(model.device)

 >>> # can be used by passing in cache implementation
 >>> out = model.generate(**inputs, do_sample=False, max_new_tokens=30, cache_implementation="sliding_window")
 >>> tokenizer.batch_decode(out, skip_special_tokens=True)[0]
-"Yesterday I was on a rock concert and. I was so excited to see my favorite band perform live. I was so happy that I could hardly contain myself. I was jumping up and down and"
+"Yesterday I was on a rock concert and. I was so excited to see my favorite band. I was so excited that I was jumping up and down and screaming. I was so excited that I"
 ```

 ### Sink Cache
@ -297,8 +295,8 @@ Unlike other cache classes, this one can't be used directly by indicating a `cac
 >>> import torch
 >>> from transformers import AutoTokenizer, AutoModelForCausalLM, SinkCache

->>> tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
->>> model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto")
+>>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+>>> model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16).to("cuda:0")
 >>> inputs = tokenizer("This is a long story about unicorns, fairies and magic.", return_tensors="pt").to(model.device)

 >>> # get our cache, specify number of sink tokens and window size
@ -306,7 +304,7 @@ Unlike other cache classes, this one can't be used directly by indicating a `cac
 >>> past_key_values = SinkCache(window_length=256, num_sink_tokens=4)
 >>> out = model.generate(**inputs, do_sample=False, max_new_tokens=30, past_key_values=past_key_values)
 >>> tokenizer.batch_decode(out, skip_special_tokens=True)[0]
-"This is a long story about unicorns, fairies and magic. It is a story about a young girl named Lily who discovers that she has the power to control the elements. She learns that she can"
+"This is a long story about unicorns, fairies and magic. It is a fantasy world where unicorns and fairies live together in harmony. The story follows a young girl named Lily"
 ```

 ### Encoder-Decoder Cache
@ -334,22 +332,22 @@ In case you are using Sink Cache, you have to crop your inputs to that maximum l
 >>> import torch
 >>> from transformers import AutoTokenizer,AutoModelForCausalLM
 >>> from transformers.cache_utils import (
-...    DynamicCache,
-...    SinkCache,
-...    StaticCache,
-...    SlidingWindowCache,
-...    QuantoQuantizedCache,
-...    QuantizedCacheConfig,
-... )
+>>>     DynamicCache,
+>>>     SinkCache,
+>>>     StaticCache,
+>>>     SlidingWindowCache,
+>>>     QuantoQuantizedCache,
+>>>     QuantizedCacheConfig,
+>>> )

->>> model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+>>> model_id = "meta-llama/Llama-2-7b-chat-hf"
 >>> model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map='auto')
 >>> tokenizer = AutoTokenizer.from_pretrained(model_id)

 >>> user_prompts = ["Hello, what's your name?", "Btw, yesterday I was on a rock concert."]

 >>> past_key_values = DynamicCache()
->>> max_cache_length = past_key_values.get_max_cache_shape()
+>>> max_cache_length = past_key_values.get_max_length()

 >>> messages = []
 >>> for prompt in user_prompts:
@ -365,7 +363,7 @@ In case you are using Sink Cache, you have to crop your inputs to that maximum l
 ...     messages.append({"role": "assistant", "content": completion})

 print(messages)
-[{'role': 'user', 'content': "Hello, what's your name?"}, {'role': 'assistant', 'content': "Hello, I'm AI."}, {'role': 'user', 'content': 'Btw, yesterday I was on a rock concert.'}, {'role': 'assistant', 'content': "I'm sorry to hear that you were on a rock concert yesterday. It sounds like a fun experience, but I'm not capable of experiencing music or concerts. However, I can provide you with some information about rock music and its history. Rock music emerged in the 1950s and 1960s in the United States and Britain, and it quickly gained popularity around the world. Some of the most famous rock bands of all time include The Beatles, The Rolling Stones, Led Zeppelin, and Pink Floyd. Rock music has a distinct sound and style, with elements of blues, country, and folk music. It often features guitar solos, heavy bass lines, and drums. Rock music has had a significant impact on popular culture, influencing genres such as punk rock, heavy metal, and alternative rock."}]
+[{'role': 'user', 'content': "Hello, what's your name?"}, {'role': 'assistant', 'content': " Hello! My name is LLaMA, I'm a large language model trained by a team of researcher at Meta AI. 😊"}, {'role': 'user', 'content': 'Btw, yesterday I was on a rock concert.'}, {'role': 'assistant', 'content': ' Oh, cool! That sounds like a lot of fun! 🎉 Did you enjoy the concert? What was the band like? 🤔'}]
 ```


@ -377,19 +375,17 @@ Sometimes you would want to first fill-in cache object with key/values for certa
 >>> import copy
 >>> import torch
 >>> from transformers import AutoModelForCausalLM, AutoTokenizer, DynamicCache, StaticCache
->>> from accelerate.test_utils.testing import get_backend

->>> DEVICE, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
->>> model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
->>> model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map=DEVICE)
+>>> model_id = "meta-llama/Llama-2-7b-chat-hf"
+>>> model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="cuda")
 >>> tokenizer = AutoTokenizer.from_pretrained(model_id)

 >>> # Init StaticCache with big enough max-length (1024 tokens for the below example)
 >>> # You can also init a DynamicCache, if that suits you better
->>> prompt_cache = StaticCache(config=model.config, max_batch_size=1, max_cache_len=1024, device=DEVICE, dtype=torch.bfloat16)
+>>> prompt_cache = StaticCache(config=model.config, max_batch_size=1, max_cache_len=1024, device="cuda", dtype=torch.bfloat16)

 >>> INITIAL_PROMPT = "You are a helpful assistant. "
->>> inputs_initial_prompt = tokenizer(INITIAL_PROMPT, return_tensors="pt").to(DEVICE)
+>>> inputs_initial_prompt = tokenizer(INITIAL_PROMPT, return_tensors="pt").to("cuda")
 >>> # This is the common prompt cached, we need to run forward without grad to be abel to copy
 >>> with torch.no_grad():
 ...      prompt_cache = model(**inputs_initial_prompt, past_key_values = prompt_cache).past_key_values
@ -397,14 +393,14 @@ Sometimes you would want to first fill-in cache object with key/values for certa
 >>> prompts = ["Help me to write a blogpost about travelling.", "What is the capital of France?"]
 >>> responses = []
 >>> for prompt in prompts:
-...     new_inputs = tokenizer(INITIAL_PROMPT + prompt, return_tensors="pt").to(DEVICE)
+...     new_inputs = tokenizer(INITIAL_PROMPT + prompt, return_tensors="pt").to("cuda")
 ...     past_key_values = copy.deepcopy(prompt_cache)
 ...     outputs = model.generate(**new_inputs, past_key_values=past_key_values,max_new_tokens=20)
 ...     response = tokenizer.batch_decode(outputs)[0]
 ...     responses.append(response)

 >>> print(responses)
-['<s> You are a helpful assistant. Help me to write a blogpost about travelling.  I am excited to share my experiences with you.  I have been traveling for the past', '<s> You are a helpful assistant. What is the capital of France? \n\nAnswer: Paris is the capital of France.</s>']
+['<s> You are a helpful assistant. Help me to write a blogpost about travelling.\n\nTitle: The Ultimate Guide to Travelling: Tips, Tricks, and', '<s> You are a helpful assistant. What is the capital of France?\n\nYes, the capital of France is Paris.</s>']
 ```


@ -418,8 +414,8 @@ this legacy format, you can seamlessly convert it to a `DynamicCache` and back.
 >>> import torch
 >>> from transformers import AutoTokenizer, AutoModelForCausalLM, DynamicCache

->>> tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
->>> model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto")
+>>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+>>> model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16, device_map="auto")
 >>> inputs = tokenizer("Hello, my name is", return_tensors="pt").to(model.device)

 >>> # `return_dict_in_generate=True` is required to return the cache. `return_legacy_cache` forces the returned cache
--- a/docs/source/en/llm_optims.md
+++ b/docs/source/en/llm_optims.md
@ -156,11 +156,9 @@ def decode_one_tokens(model, cur_token, input_pos, cache_position, past_key_valu
 There are a few important things you must do to enable static kv-cache and `torch.compile` with the `StaticCache` method:
 1. Initialize the [`StaticCache`] instance before using the model for inference. There you can configure parameters like the maximum batch size and sequence length.
 2. Call `torch.compile` on the model to compile the forward pass with the static kv-cache.
-3. Use `SDPBackend.MATH` in the [torch.nn.attention.sdpa_kernel](https://pytorch.org/docs/stable/generated/torch.nn.attention.sdpa_kernel.html) context manager to enable the native PyTorch C++ implementation of scaled dot product attention to speed up inference even more.
+3. Set `enable_math=True` in the [torch.backends.cuda.sdp_kernel](https://pytorch.org/docs/master/generated/torch.nn.functional.scaled_dot_product_attention.html) context manager to enable the native PyTorch C++ implementation of scaled dot product attention to speed up inference even more.

 ```py
-from torch.nn.attention import SDPBackend, sdpa_kernel
-
 batch_size, seq_length = inputs["input_ids"].shape
 with torch.no_grad():
    past_key_values = StaticCache(
@ -181,7 +179,7 @@ with torch.no_grad():
    decode_one_tokens = torch.compile(decode_one_tokens, mode="reduce-overhead", fullgraph=True)
    cache_position = torch.tensor([seq_length + 1], device=torch_device)
    for _ in range(1, NUM_TOKENS_TO_GENERATE):
-        with sdpa_kernel(SDPBackend.MATH):
+        with torch.backends.cuda.sdp_kernel(enable_flash=False, enable_mem_efficient=False, enable_math=True):
            next_token = decode_one_tokens(model, next_token.clone(), None, cache_position, past_key_values)
            generated_ids[:, cache_position] = next_token.int()
        cache_position += 1
@ -455,11 +453,10 @@ Scaled dot product attention (SDPA) is automatically enabled in PyTorch 2.0 and
 > [!TIP]
 > SDPA supports FlashAttention-2 as long as you have the latest PyTorch version installed.

-Use the [torch.nn.attention.sdpa_kernel](https://pytorch.org/docs/stable/generated/torch.nn.attention.sdpa_kernel.html) context manager to explicitly enable or disable any of the four attention algorithms. For example, use `SDPBackend.FLASH_ATTENTION` to enable FlashAttention.
+Use the [torch.backends.cuda.sdp_kernel](https://pytorch.org/docs/master/generated/torch.nn.functional.scaled_dot_product_attention.html) context manager to explicitly enable or disable any of the three attention algorithms. For example, set `enable_flash=True` to enable FlashAttention.

 ```py
 import torch
-from torch.nn.attention import SDPBackend, sdpa_kernel
 from transformers import AutoModelForCausalLM

 model = AutoModelForCausalLM.from_pretrained(
@ -467,7 +464,7 @@ model = AutoModelForCausalLM.from_pretrained(
    torch_dtype=torch.bfloat16,
 )

-with sdpa_kernel(SDPBackend.FLASH_ATTENTION):
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
    outputs = model.generate(**inputs)
 ```

@ -476,7 +473,7 @@ with sdpa_kernel(SDPBackend.FLASH_ATTENTION):
 Quantization reduces the size of the LLM weights by storing them in a lower precision. This translates to lower memory usage and makes loading LLMs for inference more accessible if you're constrained by your GPUs memory. If you aren't limited by your GPU, you don't necessarily need to quantize your model because it can incur a small latency cost (except for AWQ and fused AWQ modules) due to the extra step required to quantize and dequantize the weights.

 > [!TIP]
-> There are many quantization libraries (see the [Quantization](./quantization) guide for more details) available, such as Quanto, AQLM, VPTQ, AWQ, and AutoGPTQ. Feel free to try them out and see which one works best for your use case. We also recommend reading the [Overview of natively supported quantization schemes in 🤗 Transformers](https://hf.co/blog/overview-quantization-transformers) blog post which compares AutoGPTQ and bitsandbytes.
+> There are many quantization libraries (see the [Quantization](./quantization) guide for more details) available, such as Quanto, AQLM, AWQ, and AutoGPTQ. Feel free to try them out and see which one works best for your use case. We also recommend reading the [Overview of natively supported quantization schemes in 🤗 Transformers](https://hf.co/blog/overview-quantization-transformers) blog post which compares AutoGPTQ and bitsandbytes.

 Use the Model Memory Calculator below to estimate and compare how much memory is required to load a model. For example, try estimating how much memory it costs to load [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1).

--- a/docs/source/en/llm_tutorial.md
+++ b/docs/source/en/llm_tutorial.md
@ -23,12 +23,6 @@ LLMs, or Large Language Models, are the key component behind text generation. In

 Autoregressive generation is the inference-time procedure of iteratively calling a model with its own generated outputs, given a few initial inputs. In 🤗 Transformers, this is handled by the [`~generation.GenerationMixin.generate`] method, which is available to all models with generative capabilities.

-<Tip>
-
-If you want to jump straight to chatting with a model, [try our chat CLI](quicktour#chat-with-text-generation-models).
-
-</Tip>
-
 This tutorial will show you how to:

 * Generate text with an LLM
@ -40,7 +34,6 @@ Before you begin, make sure you have all the necessary libraries installed:
 ```bash
 pip install transformers bitsandbytes>=0.39.0 -q
 ```
-Bitsandbytes supports multiple backends in addition to CUDA-based GPUs. Refer to the multi-backend installation [guide](https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend) to learn more.


 ## Generate text
@ -102,11 +95,9 @@ Next, you need to preprocess your text input with a [tokenizer](tokenizer_summar

 ```py
 >>> from transformers import AutoTokenizer
->>> from accelerate.test_utils.testing import get_backend

->>> DEVICE, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
 >>> tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")
->>> model_inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to(DEVICE)
+>>> model_inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to("cuda")
 ```

 The `model_inputs` variable holds the tokenized text input, as well as the attention mask. While [`~generation.GenerationMixin.generate`] does its best effort to infer the attention mask when it is not passed, we recommend passing it whenever possible for optimal results.
@ -125,7 +116,7 @@ Finally, you don't need to do it one sequence at a time! You can batch your inpu
 >>> tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
 >>> model_inputs = tokenizer(
 ...     ["A list of colors: red, blue", "Portugal is"], return_tensors="pt", padding=True
-... ).to(DEVICE)
+... ).to("cuda")
 >>> generated_ids = model.generate(**model_inputs)
 >>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
 ['A list of colors: red, blue, green, yellow, orange, purple, pink,',
@ -155,7 +146,7 @@ If not specified in the [`~generation.GenerationConfig`] file, `generate` return


 ```py
->>> model_inputs = tokenizer(["A sequence of numbers: 1, 2"], return_tensors="pt").to(DEVICE)
+>>> model_inputs = tokenizer(["A sequence of numbers: 1, 2"], return_tensors="pt").to("cuda")

 >>> # By default, the output will contain up to 20 tokens
 >>> generated_ids = model.generate(**model_inputs)
@ -177,7 +168,7 @@ By default, and unless specified in the [`~generation.GenerationConfig`] file, `
 >>> from transformers import set_seed
 >>> set_seed(42)

->>> model_inputs = tokenizer(["I am a cat."], return_tensors="pt").to(DEVICE)
+>>> model_inputs = tokenizer(["I am a cat."], return_tensors="pt").to("cuda")

 >>> # LLM + greedy decoding = repetitive, boring output
 >>> generated_ids = model.generate(**model_inputs)
@ -199,7 +190,7 @@ LLMs are [decoder-only](https://huggingface.co/learn/nlp-course/chapter1/6?fw=pt
 >>> # which is shorter, has padding on the right side. Generation fails to capture the logic.
 >>> model_inputs = tokenizer(
 ...     ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
-... ).to(DEVICE)
+... ).to("cuda")
 >>> generated_ids = model.generate(**model_inputs)
 >>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 '1, 2, 33333333333'
@ -209,7 +200,7 @@ LLMs are [decoder-only](https://huggingface.co/learn/nlp-course/chapter1/6?fw=pt
 >>> tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
 >>> model_inputs = tokenizer(
 ...     ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
-... ).to(DEVICE)
+... ).to("cuda")
 >>> generated_ids = model.generate(**model_inputs)
 >>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 '1, 2, 3, 4, 5, 6,'
@ -226,7 +217,7 @@ Some models and tasks expect a certain input prompt format to work properly. Whe
 ... )
 >>> set_seed(0)
 >>> prompt = """How many helicopters can a human eat in one sitting? Reply as a thug."""
->>> model_inputs = tokenizer([prompt], return_tensors="pt").to(DEVICE)
+>>> model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
 >>> input_length = model_inputs.input_ids.shape[1]
 >>> generated_ids = model.generate(**model_inputs, max_new_tokens=20)
 >>> print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
@ -242,7 +233,7 @@ Some models and tasks expect a certain input prompt format to work properly. Whe
 ...     },
 ...     {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
 ... ]
->>> model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(DEVICE)
+>>> model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to("cuda")
 >>> input_length = model_inputs.shape[1]
 >>> generated_ids = model.generate(model_inputs, do_sample=True, max_new_tokens=20)
 >>> print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
@ -274,9 +265,8 @@ While the autoregressive generation process is relatively straightforward, makin

 ### Related libraries

-1. [`optimum`](https://github.com/huggingface/optimum), an extension of 🤗 Transformers that optimizes for specific hardware devices;
+1. [`optimum`](https://github.com/huggingface/optimum), an extension of 🤗 Transformers that optimizes for specific hardware devices.
 2. [`outlines`](https://github.com/outlines-dev/outlines), a library where you can constrain text generation (e.g. to generate JSON files);
-3. [`SynCode`](https://github.com/uiuc-focal-lab/syncode), a library for context-free grammar guided generation (e.g. JSON, SQL, Python);
+3. [`SynCode`](https://github.com/uiuc-focal-lab/syncode), a library for context-free grammar guided generation. (e.g. JSON, SQL, Python)
 4. [`text-generation-inference`](https://github.com/huggingface/text-generation-inference), a production-ready server for LLMs;
 5. [`text-generation-webui`](https://github.com/oobabooga/text-generation-webui), a UI for text generation;
-6. [`logits-processor-zoo`](https://github.com/NVIDIA/logits-processor-zoo), containing additional options to control text generation with 🤗 Transformers. See our related [blog post](https://huggingface.co/blog/logits-processor-zoo).
--- a/docs/source/en/main_classes/data_collator.md
+++ b/docs/source/en/main_classes/data_collator.md
@ -71,6 +71,3 @@ Examples of use can be found in the [example scripts](../examples) or [example n

 [[autodoc]] data.data_collator.DataCollatorWithFlattening

-# DataCollatorForMultipleChoice
-
-[[autodoc]] data.data_collator.DataCollatorForMultipleChoice
--- a/docs/source/en/main_classes/deepspeed.md
+++ b/docs/source/en/main_classes/deepspeed.md
@ -16,7 +16,7 @@ rendered properly in your Markdown viewer.

 # DeepSpeed

-[DeepSpeed](https://github.com/deepspeedai/DeepSpeed), powered by Zero Redundancy Optimizer (ZeRO), is an optimization library for training and fitting very large models onto a GPU. It is available in several ZeRO stages, where each stage progressively saves more GPU memory by partitioning the optimizer state, gradients, parameters, and enabling offloading to a CPU or NVMe. DeepSpeed is integrated with the [`Trainer`] class and most of the setup is automatically taken care of for you. 
+[DeepSpeed](https://github.com/microsoft/DeepSpeed), powered by Zero Redundancy Optimizer (ZeRO), is an optimization library for training and fitting very large models onto a GPU. It is available in several ZeRO stages, where each stage progressively saves more GPU memory by partitioning the optimizer state, gradients, parameters, and enabling offloading to a CPU or NVMe. DeepSpeed is integrated with the [`Trainer`] class and most of the setup is automatically taken care of for you. 

 However, if you want to use DeepSpeed without the [`Trainer`], Transformers provides a [`HfDeepSpeedConfig`] class.

--- a/docs/source/en/main_classes/image_processor.md
+++ b/docs/source/en/main_classes/image_processor.md
@ -27,7 +27,6 @@ from transformers import AutoImageProcessor

 processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50", use_fast=True)
 ```
-Note that `use_fast` will be set to `True` by default in a future release.

 When using a fast image processor, you can also set the `device` argument to specify the device on which the processing should be done. By default, the processing is done on the same device as the inputs if the inputs are tensors, or on the CPU otherwise.

@ -43,17 +42,21 @@ images_processed = processor(images, return_tensors="pt", device="cuda")
 Here are some speed comparisons between the base and fast image processors for the `DETR` and `RT-DETR` models, and how they impact overall inference time:

 <div class="flex">
-  <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_detr_fast_padded.png" />
-</div>
-<div class="flex">
-  <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_detr_fast_batched_compiled.png" />
+  <div>
+    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_detr_fast_padded.png" />
+  </div>
+  <div>
+    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_detr_fast_batched_compiled.png" />
+  </div>
 </div>

 <div class="flex">
-  <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_rt_detr_fast_single.png" />
-</div>
-<div class="flex">
-  <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_rt_detr_fast_batched.png" />
+  <div>
+    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_rt_detr_fast_single.png" />
+  </div>
+  <div>
+    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_rt_detr_fast_batched.png" />
+  </div>
 </div>

 These benchmarks were run on an [AWS EC2 g5.2xlarge instance](https://aws.amazon.com/ec2/instance-types/g5/), utilizing an NVIDIA A10G Tensor Core GPU.
--- a/docs/source/en/main_classes/quantization.md
+++ b/docs/source/en/main_classes/quantization.md
@ -34,10 +34,6 @@ Learn how to quantize models in the [Quantization](../quantization) guide.

 [[autodoc]] AqlmConfig

-## VptqConfig
-
-[[autodoc]] VptqConfig
-
 ## AwqConfig

 [[autodoc]] AwqConfig
@ -57,10 +53,6 @@ Learn how to quantize models in the [Quantization](../quantization) guide.

 [[autodoc]] quantizers.base.HfQuantizer

-## HiggsConfig
-
-[[autodoc]] HiggsConfig
-
 ## HqqConfig

 [[autodoc]] HqqConfig
@ -80,11 +72,3 @@ Learn how to quantize models in the [Quantization](../quantization) guide.
 ## BitNetConfig

 [[autodoc]] BitNetConfig
-
-## SpQRConfig
-
-[[autodoc]] SpQRConfig
-
-## FineGrainedFP8Config
-
-[[autodoc]] FineGrainedFP8Config
--- a/docs/source/en/model_doc/aria.md
+++ b/docs/source/en/model_doc/aria.md
@ -1,106 +0,0 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# Aria
-
-## Overview
-
-The Aria model was proposed in [Aria: An Open Multimodal Native Mixture-of-Experts Model](https://huggingface.co/papers/2410.05993) by Li et al. from the Rhymes.AI team.
-
-Aria is an open multimodal-native model with best-in-class performance across a wide range of multimodal, language, and coding tasks. It has a Mixture-of-Experts architecture, with respectively 3.9B and 3.5B activated parameters per visual token and text token. 
-
-The abstract from the paper is the following:
-
-*Information comes in diverse modalities. Multimodal native AI models are essential to integrate real-world information and deliver comprehensive understanding. While proprietary multimodal native models exist, their lack of openness imposes obstacles for adoptions, let alone adaptations. To fill this gap, we introduce Aria, an open multimodal native model with best-in-class performance across a wide range of multimodal, language, and coding tasks. Aria is a mixture-of-expert model with 3.9B and 3.5B activated parameters per visual token and text token, respectively. It outperforms Pixtral-12B and Llama3.2-11B, and is competitive against the best proprietary models on various multimodal tasks. We pre-train Aria from scratch following a 4-stage pipeline, which progressively equips the model with strong capabilities in language understanding, multimodal understanding, long context window, and instruction following. We open-source the model weights along with a codebase that facilitates easy adoptions and adaptations of Aria in real-world applications.*
-
-This model was contributed by [m-ric](https://huggingface.co/m-ric).
-The original code can be found [here](https://github.com/rhymes-ai/Aria).
-
-## Usage tips
-
-Here's how to use the model for vision tasks:
-```python
-import requests
-import torch
-from PIL import Image
-
-from transformers import AriaProcessor, AriaForConditionalGeneration
-
-model_id_or_path = "rhymes-ai/Aria"
-
-model = AriaForConditionalGeneration.from_pretrained(
-    model_id_or_path, device_map="auto"
-)
-
-processor = AriaProcessor.from_pretrained(model_id_or_path)
-
-image = Image.open(requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw)
-
-messages = [
-    {
-        "role": "user",
-        "content": [
-            {"type": "image"},
-            {"text": "what is the image?", "type": "text"},
-        ],
-    }
-]
-
-text = processor.apply_chat_template(messages, add_generation_prompt=True)
-inputs = processor(text=text, images=image, return_tensors="pt")
-inputs.to(model.device)
-
-output = model.generate(
-    **inputs,
-    max_new_tokens=15,
-    stop_strings=["<|im_end|>"],
-    tokenizer=processor.tokenizer,
-    do_sample=True,
-    temperature=0.9,
-)
-output_ids = output[0][inputs["input_ids"].shape[1]:]
-response = processor.decode(output_ids, skip_special_tokens=True)
-```
-
-
-## AriaImageProcessor
-
-[[autodoc]] AriaImageProcessor
-
-## AriaProcessor
-
-[[autodoc]] AriaProcessor
-
-## AriaTextConfig
-
-[[autodoc]] AriaTextConfig
-
-## AriaConfig
-
-[[autodoc]] AriaConfig
-
-## AriaTextModel
-
-[[autodoc]] AriaTextModel
-
-## AriaTextForCausalLM
-
-[[autodoc]] AriaTextForCausalLM
-
-## AriaForConditionalGeneration
-
-[[autodoc]] AriaForConditionalGeneration
-    - forward
--- a/docs/source/en/model_doc/bamba.md
+++ b/docs/source/en/model_doc/bamba.md
@ -1,64 +0,0 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# Bamba
-
-
-## Overview
-
-Bamba-9B is a decoder-only language model based on the [Mamba-2](https://github.com/state-spaces/mamba) architecture and is designed to handle a wide range of text generation tasks. It is trained from scratch using a two-stage training approach. In the first stage, the model is trained on 2 trillion tokens from the Dolma v1.7 dataset. In the second stage, it undergoes additional training on 200 billion tokens, leveraging a carefully curated blend of high-quality data to further refine its performance and enhance output quality.
-
-Checkout all Bamba-9B model checkpoints [here](https://github.com/foundation-model-stack/bamba).
-
-## BambaConfig
-
-| Model            | Params       | # Layers | Hidden Dim. | Attention Heads | GQA | KV Heads | Context Length |  Tied Embeddings |
-|-------------------|--------------|----------|-------------|-----------------|-----|----------|----------------|------------------|
-| Bamba  | 9B (9.78B)   | 32       | 4096        | 32              | Yes | 8        | 4096           | True |
-
-[[autodoc]] BambaConfig
-
-<!---
-## Usage Tips
-
-Tips: 
-
- The architecture is based on Mamba-2 models.
-
-## BambaModel
-
-[[autodoc]] BambaModel
-    - forward
-->
-
-## BambaForCausalLM
-
-```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-model = AutoModelForCausalLM.from_pretrained("ibm-fms/Bamba-9B")
-tokenizer = AutoTokenizer.from_pretrained("ibm-fms/Bamba-9B")
-
-message = ["Mamba is a snake with following properties  "]
-inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False)
-response = model.generate(**inputs, max_new_tokens=64)
-print(tokenizer.batch_decode(response, skip_special_tokens=True)[0])
-```
-
-[[autodoc]] BambaForCausalLM
-    - forward
-
-This HF implementation is contributed by [ani300](https://github.com/ani300) and [fabianlim](https://github.com/fabianlim). 
--- a/docs/source/en/model_doc/beit.md
+++ b/docs/source/en/model_doc/beit.md
@ -71,43 +71,6 @@ alt="drawing" width="600"/>

 <small> BEiT pre-training. Taken from the <a href="https://arxiv.org/abs/2106.08254">original paper.</a> </small>

-### Using Scaled Dot Product Attention (SDPA)
-
-PyTorch includes a native scaled dot-product attention (SDPA) operator as part of `torch.nn.functional`. This function 
-encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
-[official documentation](https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html) 
-or the [GPU Inference](https://huggingface.co/docs/transformers/main/en/perf_infer_gpu_one#pytorch-scaled-dot-product-attention)
-page for more information.
-
-SDPA is used by default for `torch>=2.1.1` when an implementation is available, but you may also set 
-`attn_implementation="sdpa"` in `from_pretrained()` to explicitly request SDPA to be used.
-
-```
-from transformers import BeitForImageClassification
-model = BeitForImageClassification.from_pretrained("microsoft/beit-base-patch16-224", attn_implementation="sdpa", torch_dtype=torch.float16)
-...
-```
-
-For the best speedups, we recommend loading the model in half-precision (e.g. `torch.float16` or `torch.bfloat16`).
-
-On a local benchmark (NVIDIA GeForce RTX 2060-8GB, PyTorch 2.5.1, OS Ubuntu 20.04) with `float16` and 
-`microsoft/beit-base-patch16-224` model, we saw the following improvements during training and inference:
-
-#### Training
-
-| num_training_steps | batch_size | image_size   | is_cuda | Time per batch (eager - s) | Time per batch (sdpa - s) | Speedup (%) | Eager peak mem (MB) | SDPA peak mem (MB) | Mem saving (%) |
-|--------------------|------------|--------------|---------|----------------------------|---------------------------|-------------|----------------------|--------------------|----------------|
-| 50                 | 2          | (1048, 640)  | True    | 0.984                      | 0.746                     | 31.975      | 6738.915            | 4319.886          | 55.998         |
-
-#### Inference
-
-|   Image batch size |   Eager (s/iter) | Eager CI, %   |   Eager memory (MB) |   SDPA (s/iter) | SDPA CI, %   |   SDPA memory (MB) |   SDPA speedup | SDPA memory saved (%) |
-|-------------------:|-----------------:|:--------------|--------------------:|----------------:|:-------------|-------------------:|---------------:|----------------------:|
-|                  1 |            0.012 | ±0.3%         |         3.76657e+08 |           0.011 | ±0.5%        |        3.75739e+08 |          1.05  |                 0.244 |
-|                  4 |            0.013 | ±0.1%         |         4.03147e+08 |           0.011 | ±0.2%        |        3.90554e+08 |          1.178 |                 3.225 |
-|                 16 |            0.045 | ±0.1%         |         4.96697e+08 |           0.035 | ±0.1%        |        4.51232e+08 |          1.304 |                10.076 |
-|                 32 |            0.088 | ±0.1%         |         6.24417e+08 |           0.066 | ±0.1%        |        5.33488e+08 |          1.325 |                17.044 |
-
 ## Resources

 A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.
--- a/docs/source/en/model_doc/blip.md
+++ b/docs/source/en/model_doc/blip.md
@ -61,11 +61,6 @@ The original code can be found [here](https://github.com/salesforce/BLIP).
 [[autodoc]] BlipImageProcessor
    - preprocess

-## BlipImageProcessorFast
-
-[[autodoc]] BlipImageProcessorFast
-    - preprocess
-
 <frameworkcontent>
 <pt>

--- a/docs/source/en/model_doc/clip.md
+++ b/docs/source/en/model_doc/clip.md
@ -251,11 +251,6 @@ The resource should ideally demonstrate something new instead of duplicating an
 [[autodoc]] CLIPImageProcessor
    - preprocess

-## CLIPImageProcessorFast
-
-[[autodoc]] CLIPImageProcessorFast
-    - preprocess
-
 ## CLIPFeatureExtractor

 [[autodoc]] CLIPFeatureExtractor
--- a/docs/source/en/model_doc/cohere2.md
+++ b/docs/source/en/model_doc/cohere2.md
@ -1,51 +0,0 @@
-# Cohere
-
-## Overview
-[C4AI Command R7B](https://cohere.com/blog/command-r7b) is an open weights research release of a 7B billion parameter model developed by Cohere and Cohere For AI. It has advanced capabilities optimized for various use cases, including reasoning, summarization, question answering, and code. The model is trained to perform sophisticated tasks including Retrieval Augmented Generation (RAG) and tool use. The model also has powerful agentic capabilities that can use and combine multiple tools over multiple steps to accomplish more difficult tasks. It obtains top performance on enterprise-relevant code use cases. C4AI Command R7B is a multilingual model trained on 23 languages.
-
-The model features three layers with sliding window attention (window size 4096) and ROPE for efficient local context modeling and relative positional encoding. A fourth layer uses global attention without positional embeddings, enabling unrestricted token interactions across the entire sequence.
-
-The model has been trained on 23 languages: English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Arabic, Chinese, Russian, Polish, Turkish, Vietnamese, Dutch, Czech, Indonesian, Ukrainian, Romanian, Greek, Hindi, Hebrew, and Persian.
-
-## Usage tips
-The model and tokenizer can be loaded via:
-
-```python
-# pip install transformers
-from transformers import AutoTokenizer, AutoModelForCausalLM
-
-model_id = "CohereForAI/c4ai-command-r7b-12-2024"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id)
-
-# Format message with the command-r chat template
-messages = [{"role": "user", "content": "Hello, how are you?"}]
-input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
-
-gen_tokens = model.generate(
-    input_ids,
-    max_new_tokens=100,
-    do_sample=True,
-    temperature=0.3,
-)
-
-gen_text = tokenizer.decode(gen_tokens[0])
-print(gen_text)
-```
-
-## Cohere2Config
-
-[[autodoc]] Cohere2Config
-
-## Cohere2Model
-
-[[autodoc]] Cohere2Model
-    - forward
-
-
-## Cohere2ForCausalLM
-
-[[autodoc]] Cohere2ForCausalLM
-    - forward
-
-
--- a/docs/source/en/model_doc/colpali.md
+++ b/docs/source/en/model_doc/colpali.md
@ -1,90 +0,0 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# ColPali
-
-## Overview
-
-The *ColPali* model was proposed in [ColPali: Efficient Document Retrieval with Vision Language Models](https://doi.org/10.48550/arXiv.2407.01449) by **Manuel Faysse***, **Hugues Sibille***, **Tony Wu***, Bilel Omrani, Gautier Viaud, Céline Hudelot, Pierre Colombo (* denotes equal contribution). Work lead by ILLUIN Technology.
-
-In our proposed *ColPali* approach, we leverage VLMs to construct efficient multi-vector embeddings directly from document images (“screenshots”) for document retrieval. We train the model to maximize the similarity between these document embeddings and the corresponding query embeddings, using the late interaction method introduced in ColBERT.
-
-Using *ColPali* removes the need for potentially complex and brittle layout recognition and OCR pipelines with a single model that can take into account both the textual and visual content (layout, charts, etc.) of a document.
-
-## Resources
-
- The *ColPali* arXiv paper can be found [here](https://doi.org/10.48550/arXiv.2407.01449). 📄
- The official blog post detailing ColPali can be found [here](https://huggingface.co/blog/manu/colpali). 📝
- The original model implementation code for the ColPali model and for the `colpali-engine` package can be found [here](https://github.com/illuin-tech/colpali). 🌎
- Cookbooks for learning to use the transformers-native version of *ColPali*, fine-tuning, and similarity maps generation can be found [here](https://github.com/tonywu71/colpali-cookbooks). 📚
-
-This model was contributed by [@tonywu71](https://huggingface.co/tonywu71) and [@yonigozlan](https://huggingface.co/yonigozlan).
-
-## Usage
-
-This example demonstrates how to use *ColPali* to embed both queries and images, calculate their similarity scores, and identify the most relevant matches. For a specific query, you can retrieve the top-k most similar images by selecting the ones with the highest similarity scores.
-
-```python
-import torch
-from PIL import Image
-
-from transformers import ColPaliForRetrieval, ColPaliProcessor
-
-model_name = "vidore/colpali-v1.2-hf"
-
-model = ColPaliForRetrieval.from_pretrained(
-    model_name,
-    torch_dtype=torch.bfloat16,
-    device_map="cuda:0",  # or "mps" if on Apple Silicon
-).eval()
-
-processor = ColPaliProcessor.from_pretrained(model_name)
-
-# Your inputs (replace dummy images with screenshots of your documents)
-images = [
-    Image.new("RGB", (32, 32), color="white"),
-    Image.new("RGB", (16, 16), color="black"),
-]
-queries = [
-    "What is the organizational structure for our R&D department?",
-    "Can you provide a breakdown of last year’s financial performance?",
-]
-
-# Process the inputs
-batch_images = processor(images=images).to(model.device)
-batch_queries = processor(text=queries).to(model.device)
-
-# Forward pass
-with torch.no_grad():
-    image_embeddings = model(**batch_images).embeddings
-    query_embeddings = model(**batch_queries).embeddings
-
-# Score the queries against the images
-scores = processor.score_retrieval(query_embeddings, image_embeddings)
-```
-
-## ColPaliConfig
-
-[[autodoc]] ColPaliConfig
-
-## ColPaliProcessor
-
-[[autodoc]] ColPaliProcessor
-
-## ColPaliForRetrieval
-
-[[autodoc]] ColPaliForRetrieval
-    - forward
--- a/docs/source/en/model_doc/convnext.md
+++ b/docs/source/en/model_doc/convnext.md
@ -64,11 +64,6 @@ If you're interested in submitting a resource to be included here, please feel f
 [[autodoc]] ConvNextImageProcessor
    - preprocess

-## ConvNextImageProcessorFast
-
-[[autodoc]] ConvNextImageProcessorFast
-    - preprocess
-
 <frameworkcontent>
 <pt>

--- a/docs/source/en/model_doc/dab-detr.md
+++ b/docs/source/en/model_doc/dab-detr.md
@ -1,119 +0,0 @@
-<!--Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
-->
-
-# DAB-DETR
-
-## Overview
-
-The DAB-DETR model was proposed in [DAB-DETR: Dynamic Anchor Boxes are Better Queries for DETR](https://arxiv.org/abs/2201.12329) by Shilong Liu, Feng Li, Hao Zhang, Xiao Yang, Xianbiao Qi, Hang Su, Jun Zhu, Lei Zhang.
-DAB-DETR is an enhanced variant of Conditional DETR. It utilizes dynamically updated anchor boxes to provide both a reference query point (x, y) and a reference anchor size (w, h), improving cross-attention computation. This new approach achieves 45.7% AP when trained for 50 epochs with a single ResNet-50 model as the backbone.
-
-<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/dab_detr_convergence_plot.png"
-alt="drawing" width="600"/>
-
-The abstract from the paper is the following:
-
-*We present in this paper a novel query formulation using dynamic anchor boxes
-for DETR (DEtection TRansformer) and offer a deeper understanding of the role
-of queries in DETR. This new formulation directly uses box coordinates as queries
-in Transformer decoders and dynamically updates them layer-by-layer. Using box
-coordinates not only helps using explicit positional priors to improve the query-to-feature similarity and eliminate the slow training convergence issue in DETR,
-but also allows us to modulate the positional attention map using the box width
-and height information. Such a design makes it clear that queries in DETR can be
-implemented as performing soft ROI pooling layer-by-layer in a cascade manner.
-As a result, it leads to the best performance on MS-COCO benchmark among
-the DETR-like detection models under the same setting, e.g., AP 45.7% using
-ResNet50-DC5 as backbone trained in 50 epochs. We also conducted extensive
-experiments to confirm our analysis and verify the effectiveness of our methods.*
-
-This model was contributed by [davidhajdu](https://huggingface.co/davidhajdu).
-The original code can be found [here](https://github.com/IDEA-Research/DAB-DETR).
-
-## How to Get Started with the Model
-
-Use the code below to get started with the model.
-
-```python
-import torch
-import requests
-
-from PIL import Image
-from transformers import AutoModelForObjectDetection, AutoImageProcessor
-
-url = 'http://images.cocodataset.org/val2017/000000039769.jpg' 
-image = Image.open(requests.get(url, stream=True).raw)
-
-image_processor = AutoImageProcessor.from_pretrained("IDEA-Research/dab-detr-resnet-50")
-model = AutoModelForObjectDetection.from_pretrained("IDEA-Research/dab-detr-resnet-50")
-
-inputs = image_processor(images=image, return_tensors="pt")
-
-with torch.no_grad():
-    outputs = model(**inputs)
-
-results = image_processor.post_process_object_detection(outputs, target_sizes=torch.tensor([image.size[::-1]]), threshold=0.3)
-
-for result in results:
-    for score, label_id, box in zip(result["scores"], result["labels"], result["boxes"]):
-        score, label = score.item(), label_id.item()
-        box = [round(i, 2) for i in box.tolist()]
-        print(f"{model.config.id2label[label]}: {score:.2f} {box}")
-```
-This should output
-```
-cat: 0.87 [14.7, 49.39, 320.52, 469.28]
-remote: 0.86 [41.08, 72.37, 173.39, 117.2]
-cat: 0.86 [344.45, 19.43, 639.85, 367.86]
-remote: 0.61 [334.27, 75.93, 367.92, 188.81]
-couch: 0.59 [-0.04, 1.34, 639.9, 477.09]
-```
-
-There are three other ways to instantiate a DAB-DETR model (depending on what you prefer):
-
-Option 1: Instantiate DAB-DETR with pre-trained weights for entire model
-```py
->>> from transformers import DabDetrForObjectDetection
-
->>> model = DabDetrForObjectDetection.from_pretrained("IDEA-Research/dab-detr-resnet-50")
-```
-
-Option 2: Instantiate DAB-DETR with randomly initialized weights for Transformer, but pre-trained weights for backbone
-```py
->>> from transformers import DabDetrConfig, DabDetrForObjectDetection
-
->>> config = DabDetrConfig()
->>> model = DabDetrForObjectDetection(config)
-```
-Option 3: Instantiate DAB-DETR with randomly initialized weights for backbone + Transformer
-```py
->>> config = DabDetrConfig(use_pretrained_backbone=False)
->>> model = DabDetrForObjectDetection(config)
-```
-
-
-## DabDetrConfig
-
-[[autodoc]] DabDetrConfig
-
-## DabDetrModel
-
-[[autodoc]] DabDetrModel
-    - forward
-
-## DabDetrForObjectDetection
-
-[[autodoc]] DabDetrForObjectDetection
-    - forward
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Matt	f3ff530a45	[run-slow] pixtral	2024-12-05 18:45:21 +00:00
Matt	6769700a16	Correct nesting in test	2024-12-05 18:41:21 +00:00
Matt	af9f67c9d9	Correct nesting in test	2024-12-05 18:40:35 +00:00
Matt	3406432db3	More error handling	2024-12-05 18:36:06 +00:00
Matt	031fdd5e10	make fixup	2024-12-05 18:32:42 +00:00
Matt	ed0b4303e3	Fix the structure of images output by the processor	2024-12-05 18:31:08 +00:00
Matt	49055e150d	Fix the structure of images output by the processor	2024-12-05 18:19:10 +00:00