fix

2025-10-20 17:13:56 +08:00 · 2025-10-17 22:21:10 +02:00 · 2025-10-17 21:30:29 +02:00 · 2025-10-17 20:59:00 +02:00 · 2025-10-17 20:30:23 +02:00 · 2025-10-17 15:40:54 +02:00
3569 changed files with 138567 additions and 321042 deletions
--- a/.circleci/create_circleci_config.py
+++ b/.circleci/create_circleci_config.py
@ -16,10 +16,9 @@
 import argparse
 import copy
 import os
-import random
 from dataclasses import dataclass
-from typing import Any, Dict, List, Optional
-import glob
+from typing import Any, Optional
+
 import yaml


@ -30,6 +29,7 @@ COMMON_ENV_VARIABLES = {
    "RUN_PIPELINE_TESTS": False,
    # will be adjust in `CircleCIJob.to_dict`.
    "RUN_FLAKY": True,
+    "DISABLE_SAFETENSORS_CONVERSION": True,
 }
 # Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical
 COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "vvv": None, "rsfE":None}
@ -82,15 +82,15 @@ class EmptyJob:
@dataclass
 class CircleCIJob:
    name: str
-    additional_env: Dict[str, Any] = None
-    docker_image: List[Dict[str, str]] = None
-    install_steps: List[str] = None
+    additional_env: dict[str, Any] = None
+    docker_image: list[dict[str, str]] = None
+    install_steps: list[str] = None
    marker: Optional[str] = None
    parallelism: Optional[int] = 0
    pytest_num_workers: int = 8
-    pytest_options: Dict[str, Any] = None
+    pytest_options: dict[str, Any] = None
    resource_class: Optional[str] = "xlarge"
-    tests_to_run: Optional[List[str]] = None
+    tests_to_run: Optional[list[str]] = None
    num_test_files_per_worker: Optional[int] = 10
    # This should be only used for doctest job!
    command_timeout: Optional[int] = None
@ -130,6 +130,12 @@ class CircleCIJob:

    def to_dict(self):
        env = COMMON_ENV_VARIABLES.copy()
+        if self.job_name != "tests_hub":
+            # fmt: off
+            # not critical
+            env.update({"HF_TOKEN": "".join(["h", "f", "_", "H", "o", "d", "V", "u", "M", "q", "b", "R", "m", "t", "b", "z", "F", "Q", "O", "Q", "A", "J", "G", "D", "l", "V", "Q", "r", "R", "N", "w", "D", "M", "V", "C", "s", "d"])})
+            # fmt: on
+
        # Do not run tests decorated by @is_flaky on pull requests
        env['RUN_FLAKY'] = os.environ.get("CIRCLE_PULL_REQUEST", "") == ""
        env.update(self.additional_env)
@ -149,7 +155,7 @@ class CircleCIJob:
                # Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues
        timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else ""
        marker_cmd = f"-m '{self.marker}'" if self.marker is not None else ""
-        junit_flags = f" -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
+        junit_flags = " -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
        joined_flaky_patterns = "|".join(FLAKY_TEST_FAILURE_PATTERNS)
        repeat_on_failure_flags = f"--reruns 5 --reruns-delay 2 --only-rerun '({joined_flaky_patterns})'"
        parallel = f' << pipeline.parameters.{self.job_name}_parallelism >> '
@ -177,14 +183,33 @@ class CircleCIJob:
                    "command": f"TESTS=$(circleci tests split  --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt"
                    }
            },
-            {"run": {"name": "fetch hub objects before pytest", "command": "python3 utils/fetch_hub_objects_for_ci.py"}},
+            # During the CircleCI docker images build time, we might already (or not) download the data.
+            # If it's done already, the files are inside the directory `/test_data/`.
+            {"run": {"name": "fetch hub objects before pytest", "command": "cp -r /test_data/* . 2>/dev/null || true; python3 utils/fetch_hub_objects_for_ci.py"}},
+            {"run": {"name": "download and unzip hub cache", "command": 'curl -L -o huggingface-cache.tar.gz https://huggingface.co/datasets/hf-internal-testing/hf_hub_cache/resolve/main/huggingface-cache.tar.gz && apt-get install pigz && tar --use-compress-program="pigz -d -p 8" -xf huggingface-cache.tar.gz && mv -n hub/* /root/.cache/huggingface/hub/ && ls -la /root/.cache/huggingface/hub/'}},
            {"run": {
                "name": "Run tests",
                "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
            },
-            {"run": {"name": "Expand to show skipped tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}},
-            {"run": {"name": "Failed tests: show reasons",   "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}},
-            {"run": {"name": "Errors",                       "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}},
+            {"run":
+                {
+                    "name": "Check for test crashes",
+                    "when": "always",
+                    "command": """if [ ! -f tests_output.txt ]; then
+                            echo "ERROR: tests_output.txt does not exist - tests may not have run properly"
+                            exit 1
+                        elif grep -q "crashed and worker restarting disabled" tests_output.txt; then
+                            echo "ERROR: Worker crash detected in test output"
+                            echo "Found: crashed and worker restarting disabled"
+                            exit 1
+                        else
+                            echo "Tests output file exists and no worker crashes detected"
+                        fi"""
+                },
+            },
+            {"run": {"name": "Expand to show skipped tests", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}},
+            {"run": {"name": "Failed tests: show reasons",   "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}},
+            {"run": {"name": "Errors",                       "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}},
            {"store_test_results": {"path": "test-results"}},
            {"store_artifacts": {"path": "test-results/junit.xml"}},
            {"store_artifacts": {"path": "reports"}},
@ -246,7 +271,6 @@ custom_tokenizers_job = CircleCIJob(
    docker_image=[{"image": "huggingface/transformers-custom-tokenizers"}],
 )

-
 examples_torch_job = CircleCIJob(
    "examples_torch",
    additional_env={"OMP_NUM_THREADS": 8},
@ -270,19 +294,6 @@ hub_job = CircleCIJob(
    resource_class="medium",
 )

-
-onnx_job = CircleCIJob(
-    "onnx",
-    docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
-    install_steps=[
-        "uv pip install .[testing,sentencepiece,onnxruntime,vision,rjieba]",
-    ],
-    pytest_options={"k onnx": None},
-    pytest_num_workers=1,
-    resource_class="small",
-)
-
-
 exotic_models_job = CircleCIJob(
    "exotic_models",
    docker_image=[{"image":"huggingface/transformers-exotic-models"}],
@ -290,7 +301,6 @@ exotic_models_job = CircleCIJob(
    pytest_options={"durations": 100},
 )

-
 repo_utils_job = CircleCIJob(
    "repo_utils",
    docker_image=[{"image":"huggingface/transformers-consistency"}],
@ -298,7 +308,6 @@ repo_utils_job = CircleCIJob(
    resource_class="large",
 )

-
 non_model_job = CircleCIJob(
    "non_model",
    docker_image=[{"image": "huggingface/transformers-torch-light"}],
@ -334,7 +343,7 @@ doc_test_job = CircleCIJob(
    pytest_num_workers=1,
 )

-REGULAR_TESTS = [torch_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
+REGULAR_TESTS = [torch_job, hub_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
 EXAMPLES_TESTS = [examples_torch_job]
 PIPELINE_TESTS = [pipelines_torch_job]
 REPO_UTIL_TESTS = [repo_utils_job]
--- a/.circleci/parse_test_outputs.py
+++ b/.circleci/parse_test_outputs.py
@ -1,5 +1,6 @@
-import re
 import argparse
+import re
+

 def parse_pytest_output(file_path):
    skipped_tests = {}
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@ -36,26 +36,31 @@ body:

        Models:

-          - text models: @ArthurZucker
-          - vision models: @amyeroberts, @qubvel
-          - speech models: @eustlb
+          - text models: @ArthurZucker @Cyrilvallez
+          - vision models: @yonigozlan @molbap
+          - audio models: @eustlb @ebezzam @vasqu
+          - multimodal models: @zucchini-nlp
          - graph models: @clefourrier

        Library:

-          - flax: @gante and @Rocketknight1
          - generate: @zucchini-nlp (visual-language models) or @gante (all others)
+          - continuous batching: @remi-or @ArthurZucker @McPatate
          - pipelines: @Rocketknight1
-          - tensorflow: @gante and @Rocketknight1
          - tokenizers: @ArthurZucker and @itazap
-          - trainer: @zach-huggingface @SunMarc
+          - trainer: @SunMarc
+          - attention: @vasqu @ArthurZucker @CyrilVallez
+          - model loading (from pretrained, etc): @CyrilVallez
+          - distributed: @3outeille @ArthurZucker
+          - CIs: @ydshieh

        Integrations:

-          - deepspeed: HF Trainer/Accelerate: @SunMarc @zach-huggingface
          - ray/raytune: @richardliaw, @amogkam
          - Big Model Inference: @SunMarc
-          - quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber
+          - quantization: @SunMarc @MekkCyber
+          - kernels: @MekkCyber @drbh
+          - peft: @BenjaminBossan @githubnemo
        
        Devices/Backends:
        
@ -69,19 +74,6 @@ body:

          - for issues with a model, report at https://discuss.huggingface.co/ and tag the model's creator.

-        HF projects:
-
-          - accelerate: [different repo](https://github.com/huggingface/accelerate)
-          - datasets: [different repo](https://github.com/huggingface/datasets)
-          - diffusers: [different repo](https://github.com/huggingface/diffusers)
-          - rust tokenizers: [different repo](https://github.com/huggingface/tokenizers)
-
-        Maintained examples (not research project or legacy):
-
-          - Flax: @Rocketknight1
-          - PyTorch: See Models above and tag the person corresponding to the modality of the example.
-          - TensorFlow: @Rocketknight1
-
        Research projects are not maintained and should be taken as is.

      placeholder: "@Username ..."
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -39,41 +39,40 @@ members/contributors who may be interested in your PR.

 Models:

- text models: @ArthurZucker
- vision models: @amyeroberts, @qubvel
- speech models: @eustlb
+- text models: @ArthurZucker @Cyrilvallez
+- vision models: @yonigozlan @molbap
+- audio models: @eustlb @ebezzam @vasqu
+- multimodal models: @zucchini-nlp
 - graph models: @clefourrier

 Library:

- flax: @gante and @Rocketknight1
 - generate: @zucchini-nlp (visual-language models) or @gante (all others)
+- continuous batching: @remi-or @ArthurZucker @McPatate
 - pipelines: @Rocketknight1
- tensorflow: @gante and @Rocketknight1
- tokenizers: @ArthurZucker
- trainer: @zach-huggingface, @SunMarc and @qgallouedec
- chat templates: @Rocketknight1
+- tokenizers: @ArthurZucker and @itazap
+- trainer: @SunMarc
+- attention: @vasqu @ArthurZucker @CyrilVallez
+- model loading (from pretrained, etc): @CyrilVallez
+- distributed: @3outeille @ArthurZucker
+- CIs: @ydshieh

 Integrations:

- deepspeed: HF Trainer/Accelerate: @SunMarc @zach-huggingface
 - ray/raytune: @richardliaw, @amogkam
 - Big Model Inference: @SunMarc
- quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber
+- quantization: @SunMarc @MekkCyber
+- kernels: @MekkCyber @drbh
+- peft: @BenjaminBossan @githubnemo
+
+Devices/Backends:
+
+- AMD ROCm: @ivarflakstad
+- Intel XPU: @IlyasMoutawwakil
+- Ascend NPU: @ivarflakstad 

 Documentation: @stevhliu

-HF projects:
-
- accelerate: [different repo](https://github.com/huggingface/accelerate)
- datasets: [different repo](https://github.com/huggingface/datasets)
- diffusers: [different repo](https://github.com/huggingface/diffusers)
- rust tokenizers: [different repo](https://github.com/huggingface/tokenizers)
-
-Maintained examples (not research project or legacy):
-
- Flax: @Rocketknight1
- PyTorch: See Models above and tag the person corresponding to the modality of the example.
- TensorFlow: @Rocketknight1
+Research projects are not maintained and should be taken as is.

 -->
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@ -0,0 +1,39 @@
+# copilot-instructions.md Guide for Hugging Face Transformers
+
+This copilot-instructions.md file provides guidance for code agents working with this codebase.
+
+## Core Project Structure
+
+- `/src/transformers`: This contains the core source code for the library
+  - `/models`: Code for individual models. Models inherit from base classes in the root `/src/transformers` directory.
+- `/tests`: This contains the core test classes for the library. These are usually inherited rather than directly run.
+  - `/models`: Tests for individual models. Model tests inherit from common tests in the root `/tests` directory.
+- `/docs`: This contains the documentation for the library, including guides, tutorials, and API references.
+
+## Coding Conventions for Hugging Face Transformers
+
+- PRs should be as brief as possible. Bugfix PRs in particular can often be only one or two lines long, and do not need large comments, docstrings or new functions in this case. Aim to minimize the size of the diff.
+- When writing tests, they should be added to an existing file. The only exception is for PRs to add a new model, when a new test directory should be created for that model.
+- Code style is enforced in the CI. You can install the style tools with `pip install -e .[quality]`. You can then run `make fixup` to apply style and consistency fixes to your code.
+
+## Copying and inheritance
+
+Many models in the codebase have similar code, but it is not shared by inheritance because we want each model file to be self-contained.
+We use two mechanisms to keep this code in sync:
+
+- "Copied from" syntax. Functions or entire classes can have a comment at the top like this: `# Copied from transformers.models.llama.modeling_llama.rotate_half` or `# Copied from transformers.models.t5.modeling_t5.T5LayerNorm with T5->MT5`
+  These comments are actively checked by the style tools, and copies will automatically be updated when the base code is updated. If you need to update a copied function, you should
+  either update the base function and use `make fixup` to propagate the change to all copies, or simply remove the `# Copied from` comment if that is inappropriate.
+- "Modular" files. These files briefly define models by composing them using inheritance from other models. They are not meant to be used directly. Instead, the style tools
+  automatically generate a complete modeling file, like `modeling_bert.py`, from the modular file like `modular_bert.py`. If a model has a modular file, the modeling file
+  should never be edited directly! Instead, changes should be made in the modular file, and then you should run `make fixup` to update the modeling file automatically.
+
+When adding new models, you should prefer `modular` style and inherit as many classes as possible from existing models.
+
+## Testing
+
+After making changes, you should usually run `make fixup` to ensure any copies and modular files are updated, and then test all affected models. This includes both
+the model you made the changes in and any other models that were updated by `make fixup`. Tests can be run with `pytest tests/models/[name]/test_modeling_[name].py`
+If your changes affect code in other classes like tokenizers or processors, you should run those tests instead, like `test_processing_[name].py` or `test_tokenization_[name].py`.
+
+In order to run tests, you may need to install dependencies. You can do this with `pip install -e .[testing]`. You will probably also need to `pip install torch accelerate` if your environment does not already have them.
--- a/.github/scripts/assign_reviewers.py
+++ b/.github/scripts/assign_reviewers.py
@ -13,14 +13,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import os
-import github
 import json
-from github import Github
+import os
 import re
 from collections import Counter
 from pathlib import Path

+import github
+from github import Github
+
+
 def pattern_to_regex(pattern):
    if pattern.startswith("/"):
        start_anchor = True
--- a/.github/scripts/codeowners_for_review_action
+++ b/.github/scripts/codeowners_for_review_action
@ -7,8 +7,8 @@ docs/ @stevhliu
 /docker/ @ydshieh @ArthurZucker

 # More high-level globs catch cases when specific rules later don't apply
-/src/transformers/models/*/processing* @molbap @yonigozlan @qubvel
-/src/transformers/models/*/image_processing* @qubvel
+/src/transformers/models/*/processing* @molbap @yonigozlan
+/src/transformers/models/*/image_processing* @yonigozlan
 /src/transformers/models/*/image_processing_*_fast* @yonigozlan

 # Owners of subsections of the library
@ -186,65 +186,65 @@ trainer_utils.py @zach-huggingface @SunMarc
 /src/transformers/models/zamba/mod*_zamba* @ArthurZucker

 # Vision models
-/src/transformers/models/beit/mod*_beit* @amyeroberts @qubvel
-/src/transformers/models/bit/mod*_bit* @amyeroberts @qubvel
-/src/transformers/models/conditional_detr/mod*_conditional_detr* @amyeroberts @qubvel
-/src/transformers/models/convnext/mod*_convnext* @amyeroberts @qubvel
-/src/transformers/models/convnextv2/mod*_convnextv2* @amyeroberts @qubvel
-/src/transformers/models/cvt/mod*_cvt* @amyeroberts @qubvel
-/src/transformers/models/deformable_detr/mod*_deformable_detr* @amyeroberts @qubvel
-/src/transformers/models/deit/mod*_deit* @amyeroberts @qubvel
-/src/transformers/models/depth_anything/mod*_depth_anything* @amyeroberts @qubvel
-/src/transformers/models/depth_anything_v2/mod*_depth_anything_v2* @amyeroberts @qubvel
-/src/transformers/models/deta/mod*_deta* @amyeroberts @qubvel
-/src/transformers/models/detr/mod*_detr* @amyeroberts @qubvel
-/src/transformers/models/dinat/mod*_dinat* @amyeroberts @qubvel
-/src/transformers/models/dinov2/mod*_dinov2* @amyeroberts @qubvel
-/src/transformers/models/dinov2_with_registers/mod*_dinov2_with_registers* @amyeroberts @qubvel
-/src/transformers/models/dit/mod*_dit* @amyeroberts @qubvel
-/src/transformers/models/dpt/mod*_dpt* @amyeroberts @qubvel
-/src/transformers/models/efficientformer/mod*_efficientformer* @amyeroberts @qubvel
-/src/transformers/models/efficientnet/mod*_efficientnet* @amyeroberts @qubvel
-/src/transformers/models/focalnet/mod*_focalnet* @amyeroberts @qubvel
-/src/transformers/models/glpn/mod*_glpn* @amyeroberts @qubvel
-/src/transformers/models/hiera/mod*_hiera* @amyeroberts @qubvel
-/src/transformers/models/ijepa/mod*_ijepa* @amyeroberts @qubvel
-/src/transformers/models/imagegpt/mod*_imagegpt* @amyeroberts @qubvel
-/src/transformers/models/levit/mod*_levit* @amyeroberts @qubvel
-/src/transformers/models/mask2former/mod*_mask2former* @amyeroberts @qubvel
-/src/transformers/models/maskformer/mod*_maskformer* @amyeroberts @qubvel
-/src/transformers/models/mobilenet_v1/mod*_mobilenet_v1* @amyeroberts @qubvel
-/src/transformers/models/mobilenet_v2/mod*_mobilenet_v2* @amyeroberts @qubvel
-/src/transformers/models/mobilevit/mod*_mobilevit* @amyeroberts @qubvel
-/src/transformers/models/mobilevitv2/mod*_mobilevitv2* @amyeroberts @qubvel
-/src/transformers/models/nat/mod*_nat* @amyeroberts @qubvel
-/src/transformers/models/poolformer/mod*_poolformer* @amyeroberts @qubvel
-/src/transformers/models/pvt/mod*_pvt* @amyeroberts @qubvel
-/src/transformers/models/pvt_v2/mod*_pvt_v2* @amyeroberts @qubvel
-/src/transformers/models/regnet/mod*_regnet* @amyeroberts @qubvel
-/src/transformers/models/resnet/mod*_resnet* @amyeroberts @qubvel
-/src/transformers/models/rt_detr/mod*_rt_detr* @amyeroberts @qubvel
-/src/transformers/models/segformer/mod*_segformer* @amyeroberts @qubvel
-/src/transformers/models/seggpt/mod*_seggpt* @amyeroberts @qubvel
-/src/transformers/models/superpoint/mod*_superpoint* @amyeroberts @qubvel
-/src/transformers/models/swiftformer/mod*_swiftformer* @amyeroberts @qubvel
-/src/transformers/models/swin/mod*_swin* @amyeroberts @qubvel
-/src/transformers/models/swinv2/mod*_swinv2* @amyeroberts @qubvel
-/src/transformers/models/swin2sr/mod*_swin2sr* @amyeroberts @qubvel
-/src/transformers/models/table_transformer/mod*_table_transformer* @amyeroberts @qubvel
-/src/transformers/models/textnet/mod*_textnet* @amyeroberts @qubvel
-/src/transformers/models/timm_wrapper/mod*_timm_wrapper* @amyeroberts @qubvel
-/src/transformers/models/upernet/mod*_upernet* @amyeroberts @qubvel
-/src/transformers/models/van/mod*_van* @amyeroberts @qubvel
-/src/transformers/models/vit/mod*_vit* @amyeroberts @qubvel
-/src/transformers/models/vit_hybrid/mod*_vit_hybrid* @amyeroberts @qubvel
-/src/transformers/models/vitdet/mod*_vitdet* @amyeroberts @qubvel
-/src/transformers/models/vit_mae/mod*_vit_mae* @amyeroberts @qubvel
-/src/transformers/models/vitmatte/mod*_vitmatte* @amyeroberts @qubvel
-/src/transformers/models/vit_msn/mod*_vit_msn* @amyeroberts @qubvel
-/src/transformers/models/vitpose/mod*_vitpose* @amyeroberts @qubvel
-/src/transformers/models/yolos/mod*_yolos* @amyeroberts @qubvel
-/src/transformers/models/zoedepth/mod*_zoedepth* @amyeroberts @qubvel
+/src/transformers/models/beit/mod*_beit* @yonigozlan @molbap
+/src/transformers/models/bit/mod*_bit* @yonigozlan @molbap
+/src/transformers/models/conditional_detr/mod*_conditional_detr* @yonigozlan @molbap
+/src/transformers/models/convnext/mod*_convnext* @yonigozlan @molbap
+/src/transformers/models/convnextv2/mod*_convnextv2* @yonigozlan @molbap
+/src/transformers/models/cvt/mod*_cvt* @yonigozlan @molbap
+/src/transformers/models/deformable_detr/mod*_deformable_detr* @yonigozlan @molbap
+/src/transformers/models/deit/mod*_deit* @yonigozlan @molbap
+/src/transformers/models/depth_anything/mod*_depth_anything* @yonigozlan @molbap
+/src/transformers/models/depth_anything_v2/mod*_depth_anything_v2* @yonigozlan @molbap
+/src/transformers/models/deta/mod*_deta* @yonigozlan @molbap
+/src/transformers/models/detr/mod*_detr* @yonigozlan @molbap
+/src/transformers/models/dinat/mod*_dinat* @yonigozlan @molbap
+/src/transformers/models/dinov2/mod*_dinov2* @yonigozlan @molbap
+/src/transformers/models/dinov2_with_registers/mod*_dinov2_with_registers* @yonigozlan @molbap
+/src/transformers/models/dit/mod*_dit* @yonigozlan @molbap
+/src/transformers/models/dpt/mod*_dpt* @yonigozlan @molbap
+/src/transformers/models/efficientformer/mod*_efficientformer* @yonigozlan @molbap
+/src/transformers/models/efficientnet/mod*_efficientnet* @yonigozlan @molbap
+/src/transformers/models/focalnet/mod*_focalnet* @yonigozlan @molbap
+/src/transformers/models/glpn/mod*_glpn* @yonigozlan @molbap
+/src/transformers/models/hiera/mod*_hiera* @yonigozlan @molbap
+/src/transformers/models/ijepa/mod*_ijepa* @yonigozlan @molbap
+/src/transformers/models/imagegpt/mod*_imagegpt* @yonigozlan @molbap
+/src/transformers/models/levit/mod*_levit* @yonigozlan @molbap
+/src/transformers/models/mask2former/mod*_mask2former* @yonigozlan @molbap
+/src/transformers/models/maskformer/mod*_maskformer* @yonigozlan @molbap
+/src/transformers/models/mobilenet_v1/mod*_mobilenet_v1* @yonigozlan @molbap
+/src/transformers/models/mobilenet_v2/mod*_mobilenet_v2* @yonigozlan @molbap
+/src/transformers/models/mobilevit/mod*_mobilevit* @yonigozlan @molbap
+/src/transformers/models/mobilevitv2/mod*_mobilevitv2* @yonigozlan @molbap
+/src/transformers/models/nat/mod*_nat* @yonigozlan @molbap
+/src/transformers/models/poolformer/mod*_poolformer* @yonigozlan @molbap
+/src/transformers/models/pvt/mod*_pvt* @yonigozlan @molbap
+/src/transformers/models/pvt_v2/mod*_pvt_v2* @yonigozlan @molbap
+/src/transformers/models/regnet/mod*_regnet* @yonigozlan @molbap
+/src/transformers/models/resnet/mod*_resnet* @yonigozlan @molbap
+/src/transformers/models/rt_detr/mod*_rt_detr* @yonigozlan @molbap
+/src/transformers/models/segformer/mod*_segformer* @yonigozlan @molbap
+/src/transformers/models/seggpt/mod*_seggpt* @yonigozlan @molbap
+/src/transformers/models/superpoint/mod*_superpoint* @yonigozlan @molbap
+/src/transformers/models/swiftformer/mod*_swiftformer* @yonigozlan @molbap
+/src/transformers/models/swin/mod*_swin* @yonigozlan @molbap
+/src/transformers/models/swinv2/mod*_swinv2* @yonigozlan @molbap
+/src/transformers/models/swin2sr/mod*_swin2sr* @yonigozlan @molbap
+/src/transformers/models/table_transformer/mod*_table_transformer* @yonigozlan @molbap
+/src/transformers/models/textnet/mod*_textnet* @yonigozlan @molbap
+/src/transformers/models/timm_wrapper/mod*_timm_wrapper* @yonigozlan @molbap
+/src/transformers/models/upernet/mod*_upernet* @yonigozlan @molbap
+/src/transformers/models/van/mod*_van* @yonigozlan @molbap
+/src/transformers/models/vit/mod*_vit* @yonigozlan @molbap
+/src/transformers/models/vit_hybrid/mod*_vit_hybrid* @yonigozlan @molbap
+/src/transformers/models/vitdet/mod*_vitdet* @yonigozlan @molbap
+/src/transformers/models/vit_mae/mod*_vit_mae* @yonigozlan @molbap
+/src/transformers/models/vitmatte/mod*_vitmatte* @yonigozlan @molbap
+/src/transformers/models/vit_msn/mod*_vit_msn* @yonigozlan @molbap
+/src/transformers/models/vitpose/mod*_vitpose* @yonigozlan @molbap
+/src/transformers/models/yolos/mod*_yolos* @yonigozlan @molbap
+/src/transformers/models/zoedepth/mod*_zoedepth* @yonigozlan @molbap

 # Audio models
 /src/transformers/models/audio_spectrogram_transformer/mod*_audio_spectrogram_transformer* @eustlb
@ -304,7 +304,7 @@ trainer_utils.py @zach-huggingface @SunMarc
 /src/transformers/models/donut/mod*_donut* @zucchini-nlp
 /src/transformers/models/flava/mod*_flava* @zucchini-nlp
 /src/transformers/models/git/mod*_git* @zucchini-nlp
-/src/transformers/models/grounding_dino/mod*_grounding_dino* @qubvel
+/src/transformers/models/grounding_dino/mod*_grounding_dino* @yonigozlan
 /src/transformers/models/groupvit/mod*_groupvit* @zucchini-nlp
 /src/transformers/models/idefics/mod*_idefics* @zucchini-nlp
 /src/transformers/models/idefics2/mod*_idefics2* @zucchini-nlp
@ -326,10 +326,10 @@ trainer_utils.py @zach-huggingface @SunMarc
 /src/transformers/models/mgp_str/mod*_mgp_str* @zucchini-nlp
 /src/transformers/models/mllama/mod*_mllama* @zucchini-nlp
 /src/transformers/models/nougat/mod*_nougat* @NielsRogge
-/src/transformers/models/omdet_turbo/mod*_omdet_turbo* @qubvel @yonigozlan
+/src/transformers/models/omdet_turbo/mod*_omdet_turbo* @yonigozlan
 /src/transformers/models/oneformer/mod*_oneformer* @zucchini-nlp
-/src/transformers/models/owlvit/mod*_owlvit* @qubvel
-/src/transformers/models/owlv2/mod*_owlv2* @qubvel
+/src/transformers/models/owlvit/mod*_owlvit* @yonigozlan
+/src/transformers/models/owlv2/mod*_owlv2* @yonigozlan
 /src/transformers/models/paligemma/mod*_paligemma* @zucchini-nlp @molbap
 /src/transformers/models/perceiver/mod*_perceiver* @zucchini-nlp
 /src/transformers/models/pix2struct/mod*_pix2struct* @zucchini-nlp
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@ -1,10 +1,7 @@
 name: Self-hosted runner (benchmark)

 on:
-  push:
-    branches: [main]
-  pull_request:
-    types: [ opened, labeled, reopened, synchronize ]
+  workflow_dispatch:

 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
--- a/.github/workflows/benchmark_v2.yml
+++ b/.github/workflows/benchmark_v2.yml
@ -0,0 +1,57 @@
+name: Benchmark v2 Framework
+
+on:
+  workflow_dispatch:
+
+env:
+  HF_HOME: /mnt/cache
+  TRANSFORMERS_IS_CI: yes
+  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
+  # This token is created under the bot `hf-transformers-bot`.
+  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+
+jobs:
+  benchmark-v2:
+    name: Benchmark v2
+    runs-on: ${{ inputs.runner }}
+    if: |
+      (github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark')) ||
+      (github.event_name == 'schedule')
+    container:
+      image: ${{ inputs.container_image }}
+      options: ${{ inputs.container_options }}
+    steps:
+      - name: Get repo
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.commit_sha || github.sha }}
+
+      - name: Install benchmark dependencies
+        run: |
+          python3 -m pip install -r benchmark_v2/requirements.txt
+
+      - name: Reinstall transformers in edit mode
+        run: |
+          python3 -m pip uninstall -y transformers
+          python3 -m pip install -e ".[torch]"
+
+      - name: Show installed libraries and their versions
+        run: |
+          python3 -m pip list
+          python3 -c "import torch; print(f'PyTorch version: {torch.__version__}')"
+          python3 -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')"
+          python3 -c "import torch; print(f'CUDA device count: {torch.cuda.device_count()}')" || true
+          nvidia-smi || true
+
+      - name: Run benchmark v2
+        working-directory: benchmark_v2
+        run: |
+          echo "Running benchmarks"
+          python3 run_benchmarks.py \
+          --commit-id '${{ inputs.commit_sha || github.sha }}' \
+          --run-id '${{ inputs.run_id }}' \
+          --push-to-hub '${{ inputs.benchmark_repo_id}}' \
+          --token '${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}' \
+          --log-level INFO
+        env:
+          HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
--- a/.github/workflows/benchmark_v2_a10_caller.yml
+++ b/.github/workflows/benchmark_v2_a10_caller.yml
@ -0,0 +1,17 @@
+name: Benchmark v2 Scheduled Runner - A10 Single-GPU
+
+on:
+  workflow_dispatch:
+
+jobs:
+  benchmark-v2-default:
+    name: Benchmark v2 - Default Models
+    uses: ./.github/workflows/benchmark_v2.yml
+    with:
+      runner: aws-g5-4xlarge-cache-use1-public-80
+      container_image: huggingface/transformers-pytorch-gpu
+      container_options: --gpus all --privileged --ipc host --shm-size "16gb"
+      commit_sha: ${{ github.sha }}
+      run_id: ${{ github.run_id }}
+      benchmark_repo_id: hf-internal-testing/transformers-daily-benchmarks
+    secrets: inherit
--- a/.github/workflows/benchmark_v2_mi325_caller.yml
+++ b/.github/workflows/benchmark_v2_mi325_caller.yml
@ -0,0 +1,17 @@
+name: Benchmark v2 Scheduled Runner - MI325 Single-GPU
+
+on:
+  workflow_dispatch:
+
+jobs:
+  benchmark-v2-default:
+    name: Benchmark v2 - Default Models
+    uses: ./.github/workflows/benchmark_v2.yml
+    with:
+      runner: amd-mi325-ci-1gpu
+      container_image: huggingface/transformers-pytorch-amd-gpu
+      container_options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache
+      commit_sha: ${{ github.sha }}
+      run_id: ${{ github.run_id }}
+      benchmark_repo_id: hf-internal-testing/transformers-daily-benchmarks
+    secrets: inherit
--- a/.github/workflows/build-ci-docker-images.yml
+++ b/.github/workflows/build-ci-docker-images.yml
@ -26,7 +26,7 @@ jobs:

    strategy:
      matrix:
-        file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "jax-light", "examples-torch",  "examples-tf"]
+        file: ["quality", "consistency", "custom-tokenizers", "torch-light", "exotic-models", "examples-torch"]
    continue-on-error: true

    steps:
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@ -5,6 +5,7 @@ on:
    branches:
      - build_ci_docker_image*
  repository_dispatch:
+  workflow_dispatch:
  workflow_call:
    inputs:
      image_postfix:
@ -221,7 +222,7 @@ jobs:
  latest-pytorch-amd:
    name: "Latest PyTorch (AMD) [dev]"
    runs-on:
-      group: aws-general-8-plus
+      group: aws-highcpu-32-priv
    steps:
      -
        name: Set up Docker Buildx
--- a/.github/workflows/build-nightly-ci-docker-images.yml
+++ b/.github/workflows/build-nightly-ci-docker-images.yml
@ -2,6 +2,10 @@ name: Build docker images (Nightly CI)

 on:
  workflow_call:
+    inputs:
+      job:
+        required: true
+        type: string
  push:
    branches:
      - build_nightly_ci_docker_image*
@ -12,7 +16,8 @@ concurrency:

 jobs:
  latest-with-torch-nightly-docker:
-    name: "Nightly PyTorch + Stable TensorFlow"
+    name: "Nightly PyTorch"
+    if: inputs.job == 'latest-with-torch-nightly-docker' || inputs.job == ''
    runs-on:
      group: aws-general-8-plus
    steps:
@ -41,6 +46,7 @@ jobs:

  nightly-torch-deepspeed-docker:
    name: "Nightly PyTorch + DeepSpeed"
+    if: inputs.job == 'nightly-torch-deepspeed-docker' || inputs.job == ''
    runs-on:
      group: aws-g4dn-2xlarge-cache
    steps:
--- a/.github/workflows/build_documentation.yml
+++ b/.github/workflows/build_documentation.yml
@ -16,8 +16,20 @@ jobs:
      commit_sha: ${{ github.sha }}
      package: transformers
      notebook_folder: transformers_doc
-      languages: ar de en es fr hi it ko pt tr zh ja te
+      languages: en
      custom_container: huggingface/transformers-doc-builder
    secrets:
      token: ${{ secrets.HUGGINGFACE_PUSH }}
      hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
+
+   build_other_lang:
+    uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
+    with:
+      commit_sha: ${{ github.sha }}
+      package: transformers
+      notebook_folder: transformers_doc
+      languages: ar de es fr hi it ja ko pt zh
+      custom_container: huggingface/transformers-doc-builder
+    secrets:
+      token: ${{ secrets.HUGGINGFACE_PUSH }}
+      hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
--- a/.github/workflows/check_failed_tests.yml
+++ b/.github/workflows/check_failed_tests.yml
@ -35,16 +35,20 @@ env:
  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
  # This token is created under the bot `hf-transformers-bot`.
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
  CUDA_VISIBLE_DEVICES: 0,1


 jobs:
  check_new_failures:
-    name: " "
+    name: "Find commits for new failing tests"
+    strategy:
+      matrix:
+        run_idx: [1]
    runs-on:
      group: aws-g5-4xlarge-cache
+    outputs:
+      process: ${{ steps.check_file.outputs.process }}
    container:
      image: ${{ inputs.docker }}
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -55,14 +59,17 @@ jobs:
          path: /transformers/ci_results_${{ inputs.job }}

      - name: Check file
+        id: check_file
        working-directory: /transformers
        run: |
          if [ -f ci_results_${{ inputs.job }}/new_failures.json ]; then
            echo "`ci_results_${{ inputs.job }}/new_failures.json` exists, continue ..."
            echo "process=true" >> $GITHUB_ENV
+            echo "process=true" >> $GITHUB_OUTPUT
          else
            echo "`ci_results_${{ inputs.job }}/new_failures.json` doesn't exist, abort."
            echo "process=false" >> $GITHUB_ENV
+            echo "process=false" >> $GITHUB_OUTPUT
          fi

      - uses: actions/download-artifact@v4
@ -119,6 +126,10 @@ jobs:
        run: |
          python3 utils/print_env.py

+      - name: Install pytest-flakefinder
+        if: ${{ env.process == 'true' }}
+        run: python3 -m pip install pytest-flakefinder
+
      - name: Show installed libraries and their versions
        working-directory: /transformers
        if: ${{ env.process == 'true' }}
@ -127,25 +138,63 @@ jobs:
      - name: Check failed tests
        working-directory: /transformers
        if: ${{ env.process == 'true' }}
-        run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_${{ inputs.job }}/new_failures.json --output_file new_failures_with_bad_commit.json
+        run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_${{ inputs.job }}/new_failures.json --output_file new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json

      - name: Show results
        working-directory: /transformers
        if: ${{ env.process == 'true' }}
        run: |
-          ls -l new_failures_with_bad_commit.json
-          cat new_failures_with_bad_commit.json
+          ls -l new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json
+          cat new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json

-      - name: Checkout back
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}
+          path: /transformers/new_failures_with_bad_commit_${{ inputs.job }}_${{ matrix.run_idx }}.json
+
+  process_new_failures_with_commit_info:
+    name: "process bad commit reports"
+    needs: check_new_failures
+    if: needs.check_new_failures.outputs.process == 'true'
+    runs-on:
+      group: aws-g5-4xlarge-cache
+    container:
+      image: ${{ inputs.docker }}
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          name: ci_results_${{ inputs.job }}
+          path: /transformers/ci_results_${{ inputs.job }}
+
+      - uses: actions/download-artifact@v4
+        with:
+          pattern: new_failures_with_bad_commit_${{ inputs.job }}*
+          path: /transformers/new_failures_with_bad_commit_${{ inputs.job }}
+          merge-multiple: true
+
+      - name: Check files
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        run: |
-          git checkout ${{ inputs.start_sha }}
+          ls -la /transformers
+          ls -la /transformers/new_failures_with_bad_commit_${{ inputs.job }}
+
+      # Currently, we only run with a single runner by using `run_idx: [1]`. We might try to run with multiple runners
+      # to further reduce the false positive caused by flaky tests, which requires further processing to merge reports.
+      - name: Merge files
+        shell: bash
+        working-directory: /transformers
+        run: |
+          cp /transformers/new_failures_with_bad_commit_${{ inputs.job }}/new_failures_with_bad_commit_${{ inputs.job }}_1.json new_failures_with_bad_commit.json
+
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}

      - name: Process report
        shell: bash
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        env:
          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
          TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
@ -157,7 +206,6 @@ jobs:
      - name: Process report
        shell: bash
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        env:
          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
          TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
@ -172,13 +220,12 @@ jobs:

      - name: Prepare Slack report title
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        run: |
          pip install slack_sdk
          echo "title=$(python3 -c 'import sys; sys.path.append("utils"); from utils.notification_service import job_to_test_map; ci_event = "${{ inputs.ci_event }}"; job = "${{ inputs.job }}"; test_name = job_to_test_map[job]; title = f"New failed tests of {ci_event}" + ":" + f" {test_name}"; print(title)')" >> $GITHUB_ENV

      - name: Send processed report
-        if: ${{ env.process == 'true' && !endsWith(env.REPORT_TEXT, '{}') }}
+        if: ${{ !endsWith(env.REPORT_TEXT, '{}') }}
        uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
        with:
          # Slack channel id, channel name, or user id to post message.
--- a/.github/workflows/collated-reports.yml
+++ b/.github/workflows/collated-reports.yml
@ -41,9 +41,3 @@ jobs:
            --job ${{ inputs.job }}                          \
            --report-repo-id ${{ inputs.report_repo_id }}    \
            --gpu-name ${{ inputs.gpu_name }}
-
-      - name: Upload collated reports
-        uses: actions/upload-artifact@v4
-        with:
-          name: collated_reports_${{ env.CI_SHA }}.json
-          path: collated_reports_${{ env.CI_SHA }}.json
--- a/.github/workflows/doctest_job.yml
+++ b/.github/workflows/doctest_job.yml
@ -16,7 +16,6 @@ env:
  RUN_SLOW: yes
  OMP_NUM_THREADS: 16
  MKL_NUM_THREADS: 16
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true

 jobs:
--- a/.github/workflows/model_jobs.yml
+++ b/.github/workflows/model_jobs.yml
@ -12,9 +12,6 @@ on:
      slice_id:
        required: true
        type: number
-      runner_map:
-        required: false
-        type: string
      docker:
        required: true
        type: string
@ -25,6 +22,12 @@ on:
        required: false
        default: run_models_gpu
        type: string
+      runner_type:
+        required: false
+        type: string
+      report_repo_id:
+        required: false
+        type: string

 env:
  HF_HOME: /mnt/cache
@ -35,7 +38,6 @@ env:
  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
  # This token is created under the bot `hf-transformers-bot`.
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
  CUDA_VISIBLE_DEVICES: 0,1

@ -48,10 +50,12 @@ jobs:
      matrix:
        folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
    runs-on:
-      group: ${{ fromJson(inputs.runner_map)[matrix.folders][inputs.machine_type] }}
+      group: '${{ inputs.machine_type }}'
    container:
      image: ${{ inputs.docker }}
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    outputs:
+      machine_type: ${{ steps.set_machine_type.outputs.machine_type }}
    steps:
      - name: Echo input and matrix info
        shell: bash
@ -105,6 +109,7 @@ jobs:
        run: pip freeze

      - name: Set `machine_type` for report and artifact names
+        id: set_machine_type
        working-directory: /transformers
        shell: bash
        run: |
@ -120,26 +125,58 @@ jobs:

          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
+          echo "machine_type=$machine_type" >> $GITHUB_OUTPUT
+
+      - name: Create report directory if it doesn't exist
+        shell: bash
+        run: |
+          mkdir -p /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
+          echo "dummy" > /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports/dummy.txt
+          ls -la /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports

      - name: Run all tests on GPU
        working-directory: /transformers
-        run: python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
+        run: |
+          script -q -c "PATCH_TESTING_METHODS_TO_COLLECT_OUTPUTS=yes _PATCHED_TESTING_METHODS_OUTPUT_DIR=/transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports tests/${{ matrix.folders }}" test_outputs.txt
+          ls -la
+          # Extract the exit code from the output file
+          EXIT_CODE=$(tail -1 test_outputs.txt | grep -o 'COMMAND_EXIT_CODE="[0-9]*"' | cut -d'"' -f2)
+          exit ${EXIT_CODE:-1}

      - name: Failure short reports
        if: ${{ failure() }}
+        # This step is only to show information on Github Actions log.
+        # Always mark this step as successful, even if the report directory or the file `failures_short.txt` in it doesn't exist
        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/failures_short.txt
+        run: cat /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports/failures_short.txt

-      - name: Run test
-        shell: bash
+      - name: Captured information
+        if: ${{ failure() }}
+        continue-on-error: true
        run: |
-          mkdir -p /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports
-          echo "hello" > /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/hello.txt
-          echo "${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports"
+          cat /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports/captured_info.txt
+
+      - name: Copy test_outputs.txt
+        if: ${{ always() }}
+        continue-on-error: true
+        run: |
+          cp /transformers/test_outputs.txt /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports

      - name: "Test suite reports artifacts: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports
+          path: /transformers/reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
+
+  collated_reports:
+    name: Collated Reports
+    if: ${{ always() }}
+    needs: run_models_gpu
+    uses: huggingface/transformers/.github/workflows/collated-reports.yml@main
+    with:
+      job: run_models_gpu
+      report_repo_id: ${{ inputs.report_repo_id }}
+      gpu_name: ${{ inputs.runner_type }}
+      machine_type: ${{ needs.run_models_gpu.outputs.machine_type }}
+    secrets: inherit
--- a/.github/workflows/model_jobs_intel_gaudi.yml
+++ b/.github/workflows/model_jobs_intel_gaudi.yml
@ -26,7 +26,6 @@ env:
  TRANSFORMERS_IS_CI: yes
  PT_ENABLE_INT64_SUPPORT: 1
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  HF_HOME: /mnt/cache/.cache/huggingface

 jobs:
--- a/.github/workflows/pr_build_doc_with_comment.yml
+++ b/.github/workflows/pr_build_doc_with_comment.yml
@ -14,7 +14,7 @@ permissions: {}
 jobs:
  get-pr-number:
    name: Get PR number
-    if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr", "eustlb", "MekkCyber", "manueldeprada", "vasqu", "ivarflakstad", "stevhliu", "ebezzam"]'), github.actor) && (startsWith(github.event.comment.body, 'build-doc')) }}
+    if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "eustlb", "MekkCyber", "vasqu", "ivarflakstad", "stevhliu", "ebezzam", "itazap"]'), github.actor) && (startsWith(github.event.comment.body, 'build-doc')) }}
    uses: ./.github/workflows/get-pr-number.yml

  get-pr-info:
@ -98,7 +98,7 @@ jobs:
      commit_sha: ${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}
      pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
      package: transformers
-      languages: ar de en es fr hi it ko pt tr zh ja te
+      languages: ar de en es fr hi it ja ko pt zh

  update_run_status:
    name: Update Check Run Status
--- a/.github/workflows/push-important-models.yml
+++ b/.github/workflows/push-important-models.yml
@ -4,17 +4,6 @@ on:
  push:
    branches: [ main ]

-env:
-  OUTPUT_SLACK_CHANNEL_ID: "C06L2SGMEEA"
-  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  OMP_NUM_THREADS: 8
-  MKL_NUM_THREADS: 8
-  RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  TF_FORCE_GPU_ALLOW_GROWTH: true
-
 jobs:
  get_modified_models:
    name: "Get all modified files"
@ -25,111 +14,144 @@ jobs:
      - name: Check out code
        uses: actions/checkout@v4

-      - name: Get changed files
-        id: changed-files
-        uses: tj-actions/changed-files@1c8e6069583811afb28f97afeaf8e7da80c6be5c
+      - name: Get changed files using `actions/github-script`
+        id: get-changed-files
+        uses: actions/github-script@v7
        with:
-          files: src/transformers/models/**
+          script: |
+            let files = [];
+            
+            // Only handle push events
+            if (context.eventName === 'push') {
+              const afterSha = context.payload.after;
+              const branchName = context.payload.ref.replace('refs/heads/', '');
+              
+              let baseSha;
+              
+              if (branchName === 'main') {
+                console.log('Push to main branch, comparing to parent commit');
+                // Get the parent commit of the pushed commit
+                const { data: commit } = await github.rest.repos.getCommit({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  ref: afterSha
+                });
+                baseSha = commit.parents[0]?.sha;
+                if (!baseSha) {
+                  throw new Error('No parent commit found for the pushed commit');
+                }
+              } else {
+                console.log(`Push to branch ${branchName}, comparing to main`);
+                baseSha = 'main';
+              }
+              
+              const { data: comparison } = await github.rest.repos.compareCommits({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                base: baseSha,
+                head: afterSha
+              });
+              
+              // Include added, modified, and renamed files
+              files = comparison.files
+                .filter(file => file.status === 'added' || file.status === 'modified' || file.status === 'renamed')
+                .map(file => file.filename);
+            }
+            
+            // Include all files under src/transformers/ (not just models subdirectory)
+            const filteredFiles = files.filter(file => 
+              file.startsWith('src/transformers/')
+            );
+            
+            core.setOutput('changed_files', filteredFiles.join(' '));
+            core.setOutput('any_changed', filteredFiles.length > 0 ? 'true' : 'false');

-      - name: Run step if only the files listed above change
-        if: steps.changed-files.outputs.any_changed == 'true'
-        id: set-matrix
+      - name: Parse changed files with Python
+        if: steps.get-changed-files.outputs.any_changed == 'true'
        env:
-          ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
+          CHANGED_FILES: ${{ steps.get-changed-files.outputs.changed_files }}
+        id: set-matrix
        run: |
-            model_arrays=()
-            for file in $ALL_CHANGED_FILES; do
-                model_path="${file#*models/}"
-                model_path="models/${model_path%%/*}"
-                if grep -qFx "$model_path" utils/important_models.txt; then
-                    # Append the file to the matrix string
-                    model_arrays+=("$model_path")
-                fi
-            done
-            matrix_string=$(printf '"%s", ' "${model_arrays[@]}" | sed 's/, $//')
-            echo "matrix=[$matrix_string]" >> $GITHUB_OUTPUT
-  test_modified_files:
+          python3 - << 'EOF'
+          import os
+          import sys
+          import json
+          
+          # Add the utils directory to Python path
+          sys.path.insert(0, 'utils')
+          
+          # Import the important models list
+          from important_files import IMPORTANT_MODELS
+          
+          print(f"Important models: {IMPORTANT_MODELS}")
+          
+          # Get the changed files from the previous step
+          changed_files_str = os.environ.get('CHANGED_FILES', '')
+          changed_files = changed_files_str.split() if changed_files_str else []
+          
+          # Filter to only Python files
+          python_files = [f for f in changed_files if f.endswith('.py')]
+          print(f"Python files changed: {python_files}")
+          
+          result_models = set()
+          
+          # Specific files that trigger all models
+          transformers_utils_files = [
+              'modeling_utils.py',
+              'modeling_rope_utils.py', 
+              'modeling_flash_attention_utils.py',
+              'modeling_attn_mask_utils.py',
+              'cache_utils.py',
+              'masking_utils.py',
+              'pytorch_utils.py'
+          ]
+          
+          # Single loop through all Python files
+          for file in python_files:
+              # Check for files under src/transformers/models/
+              if file.startswith('src/transformers/models/'):
+                  remaining_path = file[len('src/transformers/models/'):]
+                  if '/' in remaining_path:
+                      model_dir = remaining_path.split('/')[0]
+                      if model_dir in IMPORTANT_MODELS:
+                          result_models.add(model_dir)
+                          print(f"Added model directory: {model_dir}")
+              
+              # Check for specific files under src/transformers/ or src/transformers/generation/ files
+              elif file.startswith('src/transformers/generation/') or \
+                   (file.startswith('src/transformers/') and os.path.basename(file) in transformers_utils_files):
+                  print(f"Found core file: {file} - including all important models")
+                  result_models.update(IMPORTANT_MODELS)
+                  break  # No need to continue once we include all models
+          
+          # Convert to sorted list and create matrix
+          result_list = sorted(list(result_models))
+          print(f"Final model list: {result_list}")
+          
+          if result_list:
+              matrix_json = json.dumps(result_list)
+              print(f"matrix={matrix_json}")
+              
+              # Write to GITHUB_OUTPUT
+              with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
+                  f.write(f"matrix={matrix_json}\n")
+          else:
+              print("matrix=[]")
+              with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
+                  f.write("matrix=[]\n")
+          EOF
+
+  model-ci:
+    name: Model CI
+    uses: ./.github/workflows/self-scheduled.yml
    needs: get_modified_models
-    name: Slow & FA2 tests
-    runs-on:
-      group: aws-g5-4xlarge-cache
-    container:
-      image: huggingface/transformers-all-latest-gpu
-      options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' && fromJson(needs.get_modified_models.outputs.matrix)[0] != null }}
-    strategy:
-      fail-fast: false
-      matrix:
-        model-name: ${{ fromJson(needs.get_modified_models.outputs.matrix) }}
-
-    steps:
-      - name: Check out code
-        uses: actions/checkout@v4
-
-      - name: Install locally transformers & other libs
-        run: |
-          apt install sudo
-          sudo -H pip install --upgrade pip
-          sudo -H pip uninstall -y transformers
-          sudo -H pip install -U -e ".[testing]"
-          MAX_JOBS=4 pip install flash-attn --no-build-isolation
-          pip install bitsandbytes
-
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Show installed libraries and their versions
-        run: pip freeze
-
-      - name: Run FA2 tests
-        id: run_fa2_tests
-        run:
-          pytest -rsfE -m "flash_attn_test" --make-reports=${{ matrix.model-name }}_fa2_tests/ tests/${{ matrix.model-name }}/test_modeling_*
-
-      - name: "Test suite reports artifacts: ${{ matrix.model-name }}_fa2_tests"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.model-name }}_fa2_tests
-          path: /transformers/reports/${{ matrix.model-name }}_fa2_tests
-
-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
-          title: 🤗 Results of the FA2 tests - ${{ matrix.model-name }}
-          status: ${{ steps.run_fa2_tests.conclusion}}
-          slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }}
-
-      - name: Run integration tests
-        id: run_integration_tests
-        if: always()
-        run:
-          pytest -rsfE -k "IntegrationTest"  --make-reports=tests_integration_${{ matrix.model-name }} tests/${{ matrix.model-name }}/test_modeling_*
-
-      - name: "Test suite reports artifacts: tests_integration_${{ matrix.model-name }}"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: tests_integration_${{ matrix.model-name }}
-          path: /transformers/reports/tests_integration_${{ matrix.model-name }}
-
-      - name: Post to Slack
-        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
-        with:
-          slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
-          title: 🤗 Results of the Integration tests - ${{ matrix.model-name }}
-          status: ${{ steps.run_integration_tests.conclusion}}
-          slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }}
-
-      - name: Tailscale # In order to be able to SSH when a test fails
-        if: ${{ runner.debug == '1'}}
-        uses: huggingface/tailscale-action@v1
-        with:
-          authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }}
-          slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
-          slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
-          waitForSSH: true
+    if: needs.get_modified_models.outputs.matrix != '' && needs.get_modified_models.outputs.matrix != '[]'
+    with:
+      job: run_models_gpu
+      slack_report_channel: "#transformers-ci-push"
+      docker: huggingface/transformers-all-latest-gpu
+      ci_event: push
+      report_repo_id: hf-internal-testing/transformers_ci_push
+      commit_sha: ${{ github.sha }}
+      models: ${{ needs.get_modified_models.outputs.matrix }}
+    secrets: inherit
--- a/.github/workflows/self-comment-ci.yml
+++ b/.github/workflows/self-comment-ci.yml
@ -20,7 +20,6 @@ env:
  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
  # This token is created under the bot `hf-transformers-bot`.
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
  CUDA_VISIBLE_DEVICES: 0,1

@ -29,7 +28,7 @@ jobs:
    runs-on: ubuntu-22.04
    name: Get PR number
    # For security: only allow team members to run
-    if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr", "eustlb", "MekkCyber", "manueldeprada", "vasqu", "ivarflakstad", "stevhliu", "ebezzam"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
+    if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "eustlb", "MekkCyber", "vasqu", "ivarflakstad", "stevhliu", "ebezzam", "remi-or", "itazap"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
    outputs:
      PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
    steps:
--- a/.github/workflows/self-nightly-caller.yml
+++ b/.github/workflows/self-nightly-caller.yml
@ -12,12 +12,36 @@ on:
    branches:
      - run_ci_with_nightly_torch*

+# Used for `push` to easily modify the target workflow runs to compare against
+env:
+    prev_workflow_run_id: ""
+    other_workflow_run_id: ""
+
+
 jobs:
  build_nightly_torch_ci_images:
    name: Build CI Docker Images with nightly torch
    uses: ./.github/workflows/build-nightly-ci-docker-images.yml
+    with:
+      job: latest-with-torch-nightly-docker
    secrets: inherit

+  setup:
+    name: Setup
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Setup
+        run: |
+          mkdir "setup_values"
+          echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt"
+          echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt"
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: setup_values
+          path: setup_values
+
  model-ci:
    name: Model CI
    needs: build_nightly_torch_ci_images
--- a/.github/workflows/self-scheduled-amd-mi325-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi325-caller.yml
@ -20,7 +20,7 @@ jobs:
    with:
      job: run_models_gpu
      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi325-ci
+      runner_group: amd-mi325
      docker: huggingface/transformers-pytorch-amd-gpu
      ci_event: Scheduled CI (AMD) - mi325
      report_repo_id: optimum-amd/transformers_daily_ci
@ -33,7 +33,7 @@ jobs:
    with:
      job: run_pipelines_torch_gpu
      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi325-ci
+      runner_group: amd-mi325
      docker: huggingface/transformers-pytorch-amd-gpu
      ci_event: Scheduled CI (AMD) - mi325
      report_repo_id: optimum-amd/transformers_daily_ci
@ -46,7 +46,7 @@ jobs:
    with:
      job: run_examples_gpu
      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi325-ci
+      runner_group: amd-mi325
      docker: huggingface/transformers-pytorch-amd-gpu
      ci_event: Scheduled CI (AMD) - mi325
      report_repo_id: optimum-amd/transformers_daily_ci
@ -59,7 +59,7 @@ jobs:
    with:
      job: run_torch_cuda_extensions_gpu
      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi325-ci
+      runner_group: amd-mi325
      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
      ci_event: Scheduled CI (AMD) - mi325
      report_repo_id: optimum-amd/transformers_daily_ci
--- a/.github/workflows/self-scheduled-amd-mi355-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi355-caller.yml
@ -3,7 +3,7 @@ name: Self-hosted runner scale set (AMD mi355 scheduled CI caller)
 # Note: For every job in this workflow, the name of the runner scale set is finalized in the runner yaml i.e. huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml
 # For example, 1gpu : amd-mi355-ci-1gpu
 #              2gpu : amd-mi355-ci-2gpu
-
+ 
 on:
  workflow_run:
    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
@ -20,10 +20,10 @@ jobs:
    with:
      job: run_models_gpu
      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi355-ci
-      docker: huggingface/transformers-pytorch-amd-gpu
+      runner_group: hfc-amd-mi355
+      docker: huggingface/testing-rocm7.0-preview
      ci_event: Scheduled CI (AMD) - mi355
-      report_repo_id: optimum-amd/transformers_daily_ci
+      report_repo_id: hf-transformers-bot/transformers-ci-dummy
    secrets: inherit

  torch-pipeline:
@ -32,10 +32,10 @@ jobs:
    with:
      job: run_pipelines_torch_gpu
      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi355-ci
-      docker: huggingface/transformers-pytorch-amd-gpu
+      runner_group: hfc-amd-mi355
+      docker: huggingface/testing-rocm7.0-preview
      ci_event: Scheduled CI (AMD) - mi355
-      report_repo_id: optimum-amd/transformers_daily_ci
+      report_repo_id: hf-transformers-bot/transformers-ci-dummy
    secrets: inherit

  example-ci:
@ -44,20 +44,20 @@ jobs:
    with:
      job: run_examples_gpu
      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi355-ci
-      docker: huggingface/transformers-pytorch-amd-gpu
+      runner_group: hfc-amd-mi355
+      docker: huggingface/testing-rocm7.0-preview
      ci_event: Scheduled CI (AMD) - mi355
-      report_repo_id: optimum-amd/transformers_daily_ci
+      report_repo_id: hf-transformers-bot/transformers-ci-dummy
    secrets: inherit

  deepspeed-ci:
    name: DeepSpeed CI
    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
-    with:
+    with:  
      job: run_torch_cuda_extensions_gpu
      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi355-ci
-      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
+      runner_group: hfc-amd-mi355
+      docker: huggingface/testing-rocm7.0-preview
      ci_event: Scheduled CI (AMD) - mi355
-      report_repo_id: optimum-amd/transformers_daily_ci
+      report_repo_id: hf-transformers-bot/transformers-ci-dummy
    secrets: inherit
--- a/.github/workflows/self-scheduled-caller.yml
+++ b/.github/workflows/self-scheduled-caller.yml
@ -6,7 +6,7 @@ on:
    - cron: "17 2 * * *"
  push:
    branches:
-      - run_nvidia_ci*
+      - multi_jobs_to_check_bad_commit
  workflow_dispatch:
    inputs:
      prev_workflow_run_id:
@ -23,7 +23,7 @@ on:

 # Used for `push` to easily modify the target workflow runs to compare against
 env:
-    prev_workflow_run_id: ""
+    prev_workflow_run_id: "18548615847"
    other_workflow_run_id: ""


@ -49,70 +49,10 @@ jobs:
    uses: ./.github/workflows/self-scheduled.yml
    with:
      job: run_models_gpu
-      slack_report_channel: "#transformers-ci-daily-models"
+      slack_report_channel: "#transformers-ci-dummy"
      docker: huggingface/transformers-all-latest-gpu
      ci_event: Daily CI
-      report_repo_id: hf-internal-testing/transformers_daily_ci
-      commit_sha: ${{ github.sha }}
-    secrets: inherit
-
-  torch-pipeline:
-    name: Torch pipeline CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_pipelines_torch_gpu
-      slack_report_channel: "#transformers-ci-daily-pipeline-torch"
-      docker: huggingface/transformers-pytorch-gpu
-      ci_event: Daily CI
-      report_repo_id: hf-internal-testing/transformers_daily_ci
-      commit_sha: ${{ github.sha }}
-    secrets: inherit
-
-  example-ci:
-    name: Example CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_examples_gpu
-      slack_report_channel: "#transformers-ci-daily-examples"
-      docker: huggingface/transformers-all-latest-gpu
-      ci_event: Daily CI
-      report_repo_id: hf-internal-testing/transformers_daily_ci
-      commit_sha: ${{ github.sha }}
-    secrets: inherit
-
-  trainer-fsdp-ci:
-    name: Trainer/FSDP CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_trainer_and_fsdp_gpu
-      slack_report_channel: "#transformers-ci-daily-training"
-      docker: huggingface/transformers-all-latest-gpu
-      ci_event: Daily CI
-      report_repo_id: hf-internal-testing/transformers_daily_ci
-      commit_sha: ${{ github.sha }}
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-daily-training"
-      docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
-      ci_event: Daily CI
-      working-directory-prefix: /workspace
-      report_repo_id: hf-internal-testing/transformers_daily_ci
-      commit_sha: ${{ github.sha }}
-    secrets: inherit
-
-  quantization-ci:
-    name: Quantization CI
-    uses: ./.github/workflows/self-scheduled.yml
-    with:
-      job: run_quantization_torch_gpu
-      slack_report_channel: "#transformers-ci-daily-quantization"
-      docker: huggingface/transformers-quantization-latest-gpu
-      ci_event: Daily CI
+      runner_type: "a10"
      report_repo_id: hf-internal-testing/transformers_daily_ci
      commit_sha: ${{ github.sha }}
    secrets: inherit
--- a/.github/workflows/self-scheduled-intel-gaudi.yml
+++ b/.github/workflows/self-scheduled-intel-gaudi.yml
@ -26,7 +26,6 @@ env:
  TRANSFORMERS_IS_CI: yes
  PT_ENABLE_INT64_SUPPORT: 1
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  HF_HOME: /mnt/cache/.cache/huggingface

 jobs:
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@ -31,7 +31,13 @@ on:
      commit_sha:
        required: false
        type: string
-
+      runner_type:
+        required: false
+        type: string
+      models:
+        default: ""
+        required: false
+        type: string

 env:
  HF_HOME: /mnt/cache
@ -42,7 +48,6 @@ env:
  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
  # This token is created under the bot `hf-transformers-bot`.
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
  CUDA_VISIBLE_DEVICES: 0,1
  NUM_SLICES: 2
@ -62,13 +67,12 @@ jobs:
    outputs:
      folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
      slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
-      runner_map: ${{ steps.set-matrix.outputs.runner_map }}
      quantization_matrix: ${{ steps.set-matrix-quantization.outputs.quantization_matrix }}
    steps:
      - name: Update clone
        working-directory: /transformers
        run: |
-          git fetch && git checkout ${{ github.sha }}
+          git fetch && git checkout ${{ inputs.commit_sha || github.sha }}

      - name: Cleanup
        working-directory: /transformers
@ -87,9 +91,8 @@ jobs:
        working-directory: /transformers/tests
        run: |
          if [ "${{ inputs.job }}" = "run_models_gpu" ]; then
-            echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
+            echo "folder_slices=$(python3 ../utils/split_model_tests.py --models '${{ inputs.models }}' --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
            echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
-            echo "runner_map=$(python3 ../utils/get_runner_map.py)" >> $GITHUB_OUTPUT
          elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then
            echo "folder_slices=[['trainer'], ['fsdp']]" >> $GITHUB_OUTPUT
            echo "slice_ids=[0, 1]" >> $GITHUB_OUTPUT
@ -113,16 +116,17 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        machine_type: [single-gpu, multi-gpu]
+        machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
        slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
    uses: ./.github/workflows/model_jobs.yml
    with:
      folder_slices: ${{ needs.setup.outputs.folder_slices }}
      machine_type: ${{ matrix.machine_type }}
      slice_id: ${{ matrix.slice_id }}
-      runner_map: ${{ needs.setup.outputs.runner_map }}
      docker: ${{ inputs.docker }}
      commit_sha: ${{ inputs.commit_sha || github.sha }}
+      runner_type: ${{ inputs.runner_type }}
+      report_repo_id: ${{ inputs.report_repo_id }}
    secrets: inherit

  run_trainer_and_fsdp_gpu:
@ -139,9 +143,10 @@ jobs:
      folder_slices: ${{ needs.setup.outputs.folder_slices }}
      machine_type: ${{ matrix.machine_type }}
      slice_id: ${{ matrix.slice_id }}
-      runner_map: ${{ needs.setup.outputs.runner_map }}
      docker: ${{ inputs.docker }}
      commit_sha: ${{ inputs.commit_sha || github.sha }}
+      runner_type: ${{ inputs.runner_type }}
+      report_repo_id: ${{ inputs.report_repo_id }}
      report_name_prefix: run_trainer_and_fsdp_gpu
    secrets: inherit

@ -512,7 +517,7 @@ jobs:
      run_quantization_torch_gpu,
      run_extract_warnings
    ]
-    if: ${{ always() }}
+    if: always() && !cancelled()
    uses: ./.github/workflows/slack-report.yml
    with:
      job: ${{ inputs.job }}
--- a/.github/workflows/slack-report.yml
+++ b/.github/workflows/slack-report.yml
@ -36,7 +36,7 @@ jobs:
  send_results:
    name: Send results to webhook
    runs-on: ubuntu-22.04
-    if: always()
+    if: always() && !cancelled()
    steps:
      - name: Preliminary job status
        shell: bash
@ -75,6 +75,8 @@ jobs:
          SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
          CI_EVENT: ${{ inputs.ci_event }}
+          # This `CI_TITLE` would be empty for `schedule` or `workflow_run` events.
+          CI_TITLE: ${{ github.event.head_commit.message }}
          CI_SHA: ${{ inputs.commit_sha || github.sha }}
          CI_TEST_JOB: ${{ inputs.job }}
          SETUP_STATUS: ${{ inputs.setup_status }}
@ -91,7 +93,7 @@ jobs:
            python utils/notification_service.py "${{ inputs.quantization_matrix }}"
          else
            python utils/notification_service.py "${{ inputs.folder_slices }}"
-          fi          
+          fi

      # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
      - name: Failure table artifacts
--- a/.github/workflows/ssh-runner.yml
+++ b/.github/workflows/ssh-runner.yml
@ -20,7 +20,6 @@ env:
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
  RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
  CUDA_VISIBLE_DEVICES: 0,1

@ -33,14 +32,17 @@ jobs:
    steps:
      - name: Get runner to use
        shell: bash
+        env:
+          NUM_GPUS: ${{ github.event.inputs.num_gpus }}
+          RUNNER_TYPE: ${{ github.event.inputs.runner_type }}
        run: |
-          if [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then
+          if [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "t4" ]]; then
            echo "RUNNER=aws-g4dn-4xlarge-cache" >> $GITHUB_ENV
-          elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then
+          elif [[ "$NUM_GPUS" == "multi" && "$RUNNER_TYPE" == "t4" ]]; then
            echo "RUNNER=aws-g4dn-12xlarge-cache" >> $GITHUB_ENV
-          elif [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then
+          elif [[ "$NUM_GPUS" == "single" && "$RUNNER_TYPE" == "a10" ]]; then
            echo "RUNNER=aws-g5-4xlarge-cache" >> $GITHUB_ENV
-          elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then
+          elif [[ "$NUM_GPUS" == "multi" && "$RUNNER_TYPE" == "a10" ]]; then
            echo "RUNNER=aws-g5-12xlarge-cache" >> $GITHUB_ENV
          else
            echo "RUNNER=" >> $GITHUB_ENV
@ -85,9 +87,11 @@ jobs:
      - name: Store Slack infos
        #because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step
        shell: bash
+        env:
+          GITHUB_ACTOR: ${{ github.actor }}
        run: |
-          echo "${{ github.actor }}"
-          github_actor=${{ github.actor }}
+          echo "$GITHUB_ACTOR"
+          github_actor=$GITHUB_ACTOR
          github_actor=${github_actor/'-'/'_'}
          echo "$github_actor"
          echo "github_actor=$github_actor" >> $GITHUB_ENV
--- a/.gitignore
+++ b/.gitignore
@ -13,6 +13,7 @@ tests/fixtures/cached_*_text.txt
 logs/
 lightning_logs/
 lang_code_data/
+reports/

 # Distribution / packaging
 .Python
@ -97,6 +98,7 @@ celerybeat-schedule
 # Environments
 .env
 .venv
+.venv*
 env/
 venv/
 ENV/
@ -170,3 +172,6 @@ tags

 # modular conversion
 *.modular_backup
+
+# Cursor IDE files
+.cursor/
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -278,13 +278,14 @@ are working on it).<br>
 useful to avoid duplicated work, and to differentiate it from PRs ready to be merged.<br>
 ☐ Make sure existing tests pass.<br>
 ☐ If adding a new feature, also add tests for it.<br>
-   - If you are adding a new model, make sure you use
+
+- If you are adding a new model, make sure you use
     `ModelTester.all_model_classes = (MyModel, MyModelWithLMHead,...)` to trigger the common tests.
-   - If you are adding new `@slow` tests, make sure they pass using
+- If you are adding new `@slow` tests, make sure they pass using
     `RUN_SLOW=1 python -m pytest tests/models/my_new_model/test_my_new_model.py`.
-   - If you are adding a new tokenizer, write tests and make sure
+- If you are adding a new tokenizer, write tests and make sure
     `RUN_SLOW=1 python -m pytest tests/models/{your_model_name}/test_tokenization_{your_model_name}.py` passes.
-   - CircleCI does not run the slow tests, but GitHub Actions does every night!<br>
+- CircleCI does not run the slow tests, but GitHub Actions does every night!<br>

 ☐ All public methods must have informative docstrings (see
 [`modeling_bert.py`](https://github.com/huggingface/transformers/blob/main/src/transformers/models/bert/modeling_bert.py)
@ -340,6 +341,7 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/t
 ```

 Like the slow tests, there are other environment variables available which are not enabled by default during testing:
+
 - `RUN_CUSTOM_TOKENIZERS`: Enables tests for custom tokenizers.

 More environment variables and additional information can be found in the [testing_utils.py](https://github.com/huggingface/transformers/blob/main/src/transformers/testing_utils.py).
--- a/ISSUES.md
+++ b/ISSUES.md
@ -38,7 +38,6 @@ In particular all "Please explain" questions or objectively very user-specific f

 * "How to train T5 on De->En translation?"

-
 ## The GitHub Issues

 Everything which hints at a bug should be opened as an [issue](https://github.com/huggingface/transformers/issues).
@ -154,7 +153,7 @@ You are not required to read the following guidelines before opening an issue. H
    cd examples/seq2seq
    torchrun --nproc_per_node=2 ./finetune_trainer.py \
    --model_name_or_path sshleifer/distill-mbart-en-ro-12-4 --data_dir wmt_en_ro \
-    --output_dir output_dir --overwrite_output_dir \
+    --output_dir output_dir \
    --do_train --n_train 500 --num_train_epochs 1 \
    --per_device_train_batch_size 1  --freeze_embeds \
    --src_lang en_XX --tgt_lang ro_RO --task translation \
@ -247,7 +246,6 @@ You are not required to read the following guidelines before opening an issue. H

    Try not use italics and bold text too much as these often make the text more difficult to read.

-
 12. If you are cross-referencing a specific comment in a given thread or another issue, always link to that specific comment, rather than using the issue link. If you do the latter it could be quite impossible to find which specific comment you're referring to.

    To get the link to the specific comment do not copy the url from the location bar of your browser, but instead, click the `...` icon in the upper right corner of the comment and then select "Copy Link".
@ -257,7 +255,6 @@ You are not required to read the following guidelines before opening an issue. H
    1. https://github.com/huggingface/transformers/issues/9257
    2. https://github.com/huggingface/transformers/issues/9257#issuecomment-749945162

-
 13. If you are replying to a last comment, it's totally fine to make your reply with just your comment in it. The readers can follow the information flow here.

    But if you're replying to a comment that happened some comments back it's always a good practice to quote just the relevant lines you're replying it. The `>` is used for quoting, or you can always use the menu to do so. For example your editor box will look like:
--- a/2
+++ b/2
@ -3,7 +3,7 @@
 # make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)
 export PYTHONPATH = src

-check_dirs := examples tests src utils
+check_dirs := examples tests src utils scripts benchmark benchmark_v2

 exclude_folders :=  ""

--- a/README.md
+++ b/README.md
@ -48,9 +48,11 @@ limitations under the License.
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_it.md">Italiano</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ur.md">اردو</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_bn.md">বাংলা</a> |
    </p>
 </h4>

@ -62,12 +64,11 @@ limitations under the License.
    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/transformers_as_a_model_definition.png"/>
 </h3>

+Transformers acts as the model-definition framework for state-of-the-art machine learning models in text, computer
+vision, audio, video, and multimodal model, for both inference and training.

-Transformers acts as the model-definition framework for state-of-the-art machine learning models in text, computer 
-vision, audio, video, and multimodal model, for both inference and training. 
-
-It centralizes the model definition so that this definition is agreed upon across the ecosystem. `transformers` is the 
-pivot across frameworks: if a model definition is supported, it will be compatible with the majority of training 
+It centralizes the model definition so that this definition is agreed upon across the ecosystem. `transformers` is the
+pivot across frameworks: if a model definition is supported, it will be compatible with the majority of training
 frameworks (Axolotl, Unsloth, DeepSpeed, FSDP, PyTorch-Lightning, ...), inference engines (vLLM, SGLang, TGI, ...),
 and adjacent modeling libraries (llama.cpp, mlx, ...) which leverage the model definition from `transformers`.

@ -80,7 +81,7 @@ Explore the [Hub](https://huggingface.com/) today to find a model and use Transf

 ## Installation

-Transformers works with Python 3.9+ [PyTorch](https://pytorch.org/get-started/locally/) 2.1+, [TensorFlow](https://www.tensorflow.org/install/pip) 2.6+, and [Flax](https://flax.readthedocs.io/en/latest/) 0.4.1+.
+Transformers works with Python 3.9+, and [PyTorch](https://pytorch.org/get-started/locally/) 2.1+.

 Create and activate a virtual environment with [venv](https://docs.python.org/3/library/venv.html) or [uv](https://docs.astral.sh/uv/), a fast Rust-based Python package and project manager.

@ -110,10 +111,10 @@ git clone https://github.com/huggingface/transformers.git
 cd transformers

 # pip
-pip install .[torch]
+pip install '.[torch]'

 # uv
-uv pip install .[torch]
+uv pip install '.[torch]'
 ```

 ## Quickstart
@ -193,7 +194,6 @@ pipeline("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.pn
 <details>
 <summary>Visual question answering</summary>

-
 <h3 align="center">
    <a><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/idefics-few-shot.jpg"></a>
 </h3>
--- a/awesome-transformers.md
+++ b/awesome-transformers.md
@ -6,7 +6,7 @@ developers, researchers, students, professors, engineers, and anyone else to bui

 In this list, we showcase incredibly impactful and novel projects that have pushed the field forward. We celebrate
 100 of these projects as we reach the milestone of 100k stars as a community; but we're very open to pull requests
-adding other projects to the list. If you believe a project should be here and it's not, then please, open a PR 
+adding other projects to the list. If you believe a project should be here and it's not, then please, open a PR
 to add it.

 ## [gpt4all](https://github.com/nomic-ai/gpt4all)
@ -49,7 +49,7 @@ Keywords: LLMs, Large Language Models, Agents, Chains

 [LlamaIndex](https://github.com/run-llama/llama_index) is a project that provides a central interface to connect your LLM's with external data. It provides various kinds of indices and retrieval mechanisms to perform different LLM tasks and obtain knowledge-augmented results.

-Keywords: LLMs, Large Language Models, Data Retrieval, Indices, Knowledge Augmentation 
+Keywords: LLMs, Large Language Models, Data Retrieval, Indices, Knowledge Augmentation

 ## [ParlAI](https://github.com/facebookresearch/ParlAI)

@ -257,7 +257,7 @@ Stable-Dreamfusion is a pytorch implementation of the text-to-3D model Dreamfusi
 Keywords: Text-to-3D, Stable Diffusion

 ## [txtai](https://github.com/neuml/txtai)
- 
+
 [txtai](https://github.com/neuml/txtai) is an open-source platform for semantic search and workflows powered by language models. txtai builds embeddings databases, which are a union of vector indexes and relational databases enabling similarity search with SQL. Semantic workflows connect language models together into unified applications.

 Keywords: Semantic search, LLM
@ -309,8 +309,8 @@ Keywords: OCR, LaTeX, Math formula

 OpenCLIP is an open source implementation of OpenAI's CLIP.

-The goal of this repository is to enable training models with contrastive image-text supervision, and to investigate their properties such as robustness to distribution shift. 
-The starting point is an implementation of CLIP that matches the accuracy of the original CLIP models when trained on the same dataset. 
+The goal of this repository is to enable training models with contrastive image-text supervision, and to investigate their properties such as robustness to distribution shift.
+The starting point is an implementation of CLIP that matches the accuracy of the original CLIP models when trained on the same dataset.

 Specifically, a ResNet-50 model trained with this codebase on OpenAI's 15 million image subset of YFCC achieves 32.7% top-1 accuracy on ImageNet.

@ -596,7 +596,7 @@ Keywords: Data-Centric AI, Data Quality, Noisy Labels, Outlier Detection, Active

 ## [BentoML](https://github.com/bentoml/BentoML)

-[BentoML](https://github.com/bentoml) is the unified framework for building, shipping, and scaling production-ready AI applications incorporating traditional ML, pre-trained AI models, Generative and Large Language Models. 
+[BentoML](https://github.com/bentoml) is the unified framework for building, shipping, and scaling production-ready AI applications incorporating traditional ML, pre-trained AI models, Generative and Large Language Models.
 All Hugging Face models and pipelines can be seamlessly integrated into BentoML applications, enabling the running of models on the most suitable hardware and independent scaling based on usage.

 Keywords: BentoML, Framework, Deployment, AI Applications
@ -606,4 +606,3 @@ Keywords: BentoML, Framework, Deployment, AI Applications
 [LLaMA Factory](https://github.com/hiyouga/LLaMA-Factory) offers a user-friendly fine-tuning framework that incorporates PEFT. The repository includes training(fine-tuning) and inference examples for LLaMA-2, BLOOM, Falcon, Baichuan, Qwen, and other LLMs. A ChatGLM version is also available in [ChatGLM-Efficient-Tuning](https://github.com/hiyouga/ChatGLM-Efficient-Tuning).

 Keywords: PEFT, fine-tuning, LLaMA-2, ChatGLM, Qwen
-
--- a/benchmark/benches/llama.py
+++ b/benchmark/benches/llama.py
@ -11,25 +11,27 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from logging import Logger
 import os
+import sys
+from logging import Logger
 from threading import Event, Thread
 from time import perf_counter, sleep
-from typing import Optional
-import sys
+

 # Add the parent directory to Python path to import benchmarks_entrypoint
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from benchmarks_entrypoint import MetricsRecorder
-
 import gpustat
 import psutil
 import psycopg2
+from benchmarks_entrypoint import MetricsRecorder
+

 # Optional heavy ML dependencies - only required when actually running the benchmark
 try:
    import torch
+
    from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
+
    TRANSFORMERS_AVAILABLE = True
 except ImportError:
    TRANSFORMERS_AVAILABLE = False
@ -39,7 +41,7 @@ except ImportError:
    GenerationConfig = None
    StaticCache = None

-os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+os.environ["HF_XET_HIGH_PERFORMANCE"] = "1"
 os.environ["TOKENIZERS_PARALLELISM"] = "1"

 # Only set torch precision if torch is available
@ -63,7 +65,13 @@ def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):


 def run_benchmark(
-    logger: Logger, repository: str, branch: str, commit_id: str, commit_msg: str, metrics_recorder=None, num_tokens_to_generate=100
+    logger: Logger,
+    repository: str,
+    branch: str,
+    commit_id: str,
+    commit_msg: str,
+    metrics_recorder=None,
+    num_tokens_to_generate=100,
 ):
    # Check if required ML dependencies are available
    if not TRANSFORMERS_AVAILABLE:
@ -71,11 +79,11 @@ def run_benchmark(
        logger.error("pip install torch transformers")
        logger.error("Skipping LLaMA benchmark due to missing dependencies.")
        return
-    
+
    continue_metric_collection = Event()
    metrics_thread = None
    model_id = "meta-llama/Llama-2-7b-hf"
-    
+
    # If no metrics_recorder is provided, create one for backward compatibility
    if metrics_recorder is None:
        try:
@ -136,7 +144,7 @@ def run_benchmark(
            q = torch.empty_like(probs_sort).exponential_(1)
            return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)

-        def logits_to_probs(logits, temperature: float = 1.0, top_k: Optional[int] = None):
+        def logits_to_probs(logits, temperature: float = 1.0, top_k: int | None = None):
            logits = logits / max(temperature, 1e-5)

            if top_k is not None:
@ -146,7 +154,7 @@ def run_benchmark(
            probs = torch.nn.functional.softmax(logits, dim=-1)
            return probs

-        def sample(logits, temperature: float = 1.0, top_k: Optional[int] = None):
+        def sample(logits, temperature: float = 1.0, top_k: int | None = None):
            probs = logits_to_probs(logits[0, -1], temperature, top_k)
            idx_next = multinomial_sample_one_no_sync(probs)
            return idx_next, probs
@ -154,7 +162,7 @@ def run_benchmark(
        # First eager forward pass
        logger.info("running first eager forward pass")
        start = perf_counter()
-        outputs = model(**inputs)
+        _ = model(**inputs)
        torch.cuda.synchronize()
        end = perf_counter()
        first_eager_fwd_pass_time = end - start
@ -163,7 +171,7 @@ def run_benchmark(
        # Second eager forward pass (should be faster)
        logger.info("running second eager forward pass")
        start = perf_counter()
-        outputs = model(**inputs)
+        _ = model(**inputs)
        torch.cuda.synchronize()
        end = perf_counter()
        second_eager_fwd_pass_time = end - start
@ -339,7 +347,7 @@ def run_benchmark(
    continue_metric_collection.set()
    if metrics_thread is not None:
        metrics_thread.join()
-    
+
    # Only close the recorder if we created it locally
    if should_close_recorder:
-        metrics_recorder.close() 
+        metrics_recorder.close()
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@ -31,9 +31,7 @@ from contextlib import contextmanager
 from pathlib import Path

 from git import Repo
-
 from huggingface_hub import HfApi
-
 from optimum_benchmark import Benchmark
 from optimum_benchmark_wrapper import main

--- a/benchmark/benchmarks_entrypoint.py
+++ b/benchmark/benchmarks_entrypoint.py
@ -13,19 +13,20 @@
 # limitations under the License.
 import argparse
 import importlib.util
+import json
 import logging
 import os
 import sys
-import json
 import uuid
 from datetime import datetime
-from typing import Dict, Tuple, Optional, List

 import pandas as pd

+
 try:
    from psycopg2.extensions import register_adapter
    from psycopg2.extras import Json
+
    register_adapter(dict, Json)
    PSYCOPG2_AVAILABLE = True
 except ImportError:
@ -38,8 +39,14 @@ class ImportModuleException(Exception):

 class MetricsRecorder:
    def __init__(
-        self, connection, logger: logging.Logger, repository: str, branch: str, commit_id: str, commit_msg: str, 
-        collect_csv_data: bool = True
+        self,
+        connection,
+        logger: logging.Logger,
+        repository: str,
+        branch: str,
+        commit_id: str,
+        commit_msg: str,
+        collect_csv_data: bool = True,
    ):
        self.conn = connection
        self.use_database = connection is not None
@ -51,27 +58,43 @@ class MetricsRecorder:
        self.commit_id = commit_id
        self.commit_msg = commit_msg
        self.collect_csv_data = collect_csv_data
-        
+
        # For CSV export - store all data in pandas DataFrames (only if CSV collection is enabled)
        if self.collect_csv_data:
            # Initialize empty DataFrames with proper schemas
-            self.benchmarks_df = pd.DataFrame(columns=[
-                'benchmark_id', 'repository', 'branch', 'commit_id', 'commit_message', 
-                'metadata', 'created_at'
-            ])
-            self.device_measurements_df = pd.DataFrame(columns=[
-                'benchmark_id', 'cpu_util', 'mem_megabytes', 'gpu_util', 
-                'gpu_mem_megabytes', 'time'
-            ])
-            self.model_measurements_df = pd.DataFrame(columns=[
-                'benchmark_id', 'time', 'model_load_time', 'first_eager_forward_pass_time_secs',
-                'second_eager_forward_pass_time_secs', 'first_eager_generate_time_secs',
-                'second_eager_generate_time_secs', 'time_to_first_token_secs',
-                'time_to_second_token_secs', 'time_to_third_token_secs',
-                'time_to_next_token_mean_secs', 'first_compile_generate_time_secs',
-                'second_compile_generate_time_secs', 'third_compile_generate_time_secs',
-                'fourth_compile_generate_time_secs'
-            ])
+            self.benchmarks_df = pd.DataFrame(
+                columns=[
+                    "benchmark_id",
+                    "repository",
+                    "branch",
+                    "commit_id",
+                    "commit_message",
+                    "metadata",
+                    "created_at",
+                ]
+            )
+            self.device_measurements_df = pd.DataFrame(
+                columns=["benchmark_id", "cpu_util", "mem_megabytes", "gpu_util", "gpu_mem_megabytes", "time"]
+            )
+            self.model_measurements_df = pd.DataFrame(
+                columns=[
+                    "benchmark_id",
+                    "time",
+                    "model_load_time",
+                    "first_eager_forward_pass_time_secs",
+                    "second_eager_forward_pass_time_secs",
+                    "first_eager_generate_time_secs",
+                    "second_eager_generate_time_secs",
+                    "time_to_first_token_secs",
+                    "time_to_second_token_secs",
+                    "time_to_third_token_secs",
+                    "time_to_next_token_mean_secs",
+                    "first_compile_generate_time_secs",
+                    "second_compile_generate_time_secs",
+                    "third_compile_generate_time_secs",
+                    "fourth_compile_generate_time_secs",
+                ]
+            )
        else:
            self.benchmarks_df = None
            self.device_measurements_df = None
@ -83,7 +106,7 @@ class MetricsRecorder:
        """
        # Generate a unique UUID for this benchmark
        benchmark_id = str(uuid.uuid4())
-        
+
        if self.use_database:
            with self.conn.cursor() as cur:
                cur.execute(
@ -91,28 +114,32 @@ class MetricsRecorder:
                    (benchmark_id, self.repository, self.branch, self.commit_id, self.commit_msg, metadata),
                )
                self.logger.debug(f"initialised benchmark #{benchmark_id}")
-        
+
        # Store benchmark data for CSV export (if enabled)
        if self.collect_csv_data:
            # Add row to pandas DataFrame
-            new_row = pd.DataFrame([{
-                'benchmark_id': benchmark_id,
-                'repository': self.repository,
-                'branch': self.branch,
-                'commit_id': self.commit_id,
-                'commit_message': self.commit_msg,
-                'metadata': json.dumps(metadata),
-                'created_at': datetime.utcnow().isoformat()
-            }])
+            new_row = pd.DataFrame(
+                [
+                    {
+                        "benchmark_id": benchmark_id,
+                        "repository": self.repository,
+                        "branch": self.branch,
+                        "commit_id": self.commit_id,
+                        "commit_message": self.commit_msg,
+                        "metadata": json.dumps(metadata),
+                        "created_at": datetime.utcnow().isoformat(),
+                    }
+                ]
+            )
            self.benchmarks_df = pd.concat([self.benchmarks_df, new_row], ignore_index=True)
-            
+
        mode_info = []
        if self.use_database:
            mode_info.append("database")
        if self.collect_csv_data:
            mode_info.append("CSV")
        mode_str = " + ".join(mode_info) if mode_info else "no storage"
-        
+
        self.logger.debug(f"initialised benchmark #{benchmark_id} ({mode_str} mode)")
        return benchmark_id

@ -123,16 +150,20 @@ class MetricsRecorder:
        # Store device measurements for CSV export (if enabled)
        if self.collect_csv_data:
            # Add row to pandas DataFrame
-            new_row = pd.DataFrame([{
-                'benchmark_id': benchmark_id,
-                'cpu_util': cpu_util,
-                'mem_megabytes': mem_megabytes,
-                'gpu_util': gpu_util,
-                'gpu_mem_megabytes': gpu_mem_megabytes,
-                'time': datetime.utcnow().isoformat()
-            }])
+            new_row = pd.DataFrame(
+                [
+                    {
+                        "benchmark_id": benchmark_id,
+                        "cpu_util": cpu_util,
+                        "mem_megabytes": mem_megabytes,
+                        "gpu_util": gpu_util,
+                        "gpu_mem_megabytes": gpu_mem_megabytes,
+                        "time": datetime.utcnow().isoformat(),
+                    }
+                ]
+            )
            self.device_measurements_df = pd.concat([self.device_measurements_df, new_row], ignore_index=True)
-        
+
        # Store in database if available
        if self.use_database:
            with self.conn.cursor() as cur:
@ -140,7 +171,7 @@ class MetricsRecorder:
                    "INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
                    (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
                )
-            
+
        self.logger.debug(
            f"collected device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
        )
@ -149,16 +180,13 @@ class MetricsRecorder:
        # Store model measurements for CSV export (if enabled)
        if self.collect_csv_data:
            # Add row to pandas DataFrame with flattened measurements
-            row_data = {
-                'benchmark_id': benchmark_id,
-                'time': datetime.utcnow().isoformat()
-            }
+            row_data = {"benchmark_id": benchmark_id, "time": datetime.utcnow().isoformat()}
            # Flatten the measurements dict into the row
            row_data.update(measurements)
-            
+
            new_row = pd.DataFrame([row_data])
            self.model_measurements_df = pd.concat([self.model_measurements_df, new_row], ignore_index=True)
-        
+
        # Store in database if available
        if self.use_database:
            with self.conn.cursor() as cur:
@ -174,7 +202,7 @@ class MetricsRecorder:
                        measurements,
                    ),
                )
-            
+
        self.logger.debug(f"collected model measurements for benchmark #{benchmark_id}: {measurements}")

    def export_to_csv(self, output_dir: str = "benchmark_results"):
@ -184,19 +212,19 @@ class MetricsRecorder:
        if not self.collect_csv_data:
            self.logger.warning("CSV data collection is disabled - no CSV files will be generated")
            return
-            
+
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
            self.logger.info(f"Created output directory: {output_dir}")
-            
+
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        files_created = []
-        
+
        # Export using pandas DataFrames
        self._export_pandas_data(output_dir, timestamp, files_created)
-        
+
        self.logger.info(f"CSV export complete! Created {len(files_created)} files in {output_dir}")
-    
+
    def _export_pandas_data(self, output_dir: str, timestamp: str, files_created: list):
        """
        Export CSV files using pandas DataFrames
@ -206,24 +234,24 @@ class MetricsRecorder:
        self.benchmarks_df.to_csv(benchmarks_file, index=False)
        files_created.append(benchmarks_file)
        self.logger.info(f"Exported {len(self.benchmarks_df)} benchmark records to {benchmarks_file}")
-        
-        # Export device measurements  
+
+        # Export device measurements
        device_file = os.path.join(output_dir, f"device_measurements_{timestamp}.csv")
        self.device_measurements_df.to_csv(device_file, index=False)
        files_created.append(device_file)
        self.logger.info(f"Exported {len(self.device_measurements_df)} device measurement records to {device_file}")
-        
+
        # Export model measurements (already flattened)
        model_file = os.path.join(output_dir, f"model_measurements_{timestamp}.csv")
        self.model_measurements_df.to_csv(model_file, index=False)
        files_created.append(model_file)
        self.logger.info(f"Exported {len(self.model_measurements_df)} model measurement records to {model_file}")
-        
+
        # Create comprehensive summary using pandas operations
        summary_file = os.path.join(output_dir, f"benchmark_summary_{timestamp}.csv")
        self._create_summary(summary_file)
        files_created.append(summary_file)
-    
+
    def _create_summary(self, summary_file: str):
        """
        Create a comprehensive summary CSV using pandas operations
@ -234,36 +262,42 @@ class MetricsRecorder:
            summary_df.to_csv(summary_file, index=False)
            self.logger.info(f"Created empty benchmark summary at {summary_file}")
            return
-        
+
        # Start with benchmarks as the base
        summary_df = self.benchmarks_df.copy()
-        
+
        # Add model measurements (join on benchmark_id)
        if len(self.model_measurements_df) > 0:
            # Drop 'time' column from model measurements to avoid conflicts
-            model_df = self.model_measurements_df.drop(columns=['time'], errors='ignore')
-            summary_df = summary_df.merge(model_df, on='benchmark_id', how='left')
-        
+            model_df = self.model_measurements_df.drop(columns=["time"], errors="ignore")
+            summary_df = summary_df.merge(model_df, on="benchmark_id", how="left")
+
        # Calculate device measurement aggregates using pandas groupby
        if len(self.device_measurements_df) > 0:
-            device_agg = self.device_measurements_df.groupby('benchmark_id').agg({
-                'cpu_util': ['mean', 'max', 'std', 'count'],
-                'mem_megabytes': ['mean', 'max', 'std'],
-                'gpu_util': ['mean', 'max', 'std'],
-                'gpu_mem_megabytes': ['mean', 'max', 'std']
-            }).round(3)
-            
+            device_agg = (
+                self.device_measurements_df.groupby("benchmark_id")
+                .agg(
+                    {
+                        "cpu_util": ["mean", "max", "std", "count"],
+                        "mem_megabytes": ["mean", "max", "std"],
+                        "gpu_util": ["mean", "max", "std"],
+                        "gpu_mem_megabytes": ["mean", "max", "std"],
+                    }
+                )
+                .round(3)
+            )
+
            # Flatten column names
            device_agg.columns = [f"{col[0]}_{col[1]}" for col in device_agg.columns]
            device_agg = device_agg.reset_index()
-            
+
            # Rename count column to be more descriptive
-            if 'cpu_util_count' in device_agg.columns:
-                device_agg = device_agg.rename(columns={'cpu_util_count': 'device_measurement_count'})
-            
+            if "cpu_util_count" in device_agg.columns:
+                device_agg = device_agg.rename(columns={"cpu_util_count": "device_measurement_count"})
+
            # Merge with summary
-            summary_df = summary_df.merge(device_agg, on='benchmark_id', how='left')
-        
+            summary_df = summary_df.merge(device_agg, on="benchmark_id", how="left")
+
        # Export the comprehensive summary
        summary_df.to_csv(summary_file, index=False)
        self.logger.info(f"Created comprehensive benchmark summary with {len(summary_df)} records at {summary_file}")
@ -312,23 +346,18 @@ def parse_arguments() -> tuple[str, str, str, str, bool, str]:
        type=str,
        help="The commit message associated with the commit, truncated to 70 characters.",
    )
-    
-    parser.add_argument(
-        "--csv",
-        action="store_true",
-        default=False,
-        help="Enable CSV output files generation."
-    )
-    
+
+    parser.add_argument("--csv", action="store_true", default=False, help="Enable CSV output files generation.")
+
    parser.add_argument(
        "--csv-output-dir",
        type=str,
        default="benchmark_results",
-        help="Directory for CSV output files (default: benchmark_results)."
+        help="Directory for CSV output files (default: benchmark_results).",
    )

    args = parser.parse_args()
-    
+
    # CSV is disabled by default, only enabled when --csv is used
    generate_csv = args.csv

@ -353,9 +382,10 @@ def create_database_connection():
    if not PSYCOPG2_AVAILABLE:
        logger.warning("psycopg2 not available - running in CSV-only mode")
        return None
-        
+
    try:
        import psycopg2
+
        conn = psycopg2.connect("dbname=metrics")
        logger.info("Successfully connected to database")
        return conn
@ -364,27 +394,28 @@ def create_database_connection():
        return None


-def create_global_metrics_recorder(repository: str, branch: str, commit_id: str, commit_msg: str, 
-                                   generate_csv: bool = False) -> MetricsRecorder:
+def create_global_metrics_recorder(
+    repository: str, branch: str, commit_id: str, commit_msg: str, generate_csv: bool = False
+) -> MetricsRecorder:
    """
    Create a global metrics recorder that will be used across all benchmarks.
    """
    connection = create_database_connection()
    recorder = MetricsRecorder(connection, logger, repository, branch, commit_id, commit_msg, generate_csv)
-    
+
    # Log the storage mode
    storage_modes = []
    if connection is not None:
        storage_modes.append("database")
    if generate_csv:
        storage_modes.append("CSV")
-    
+
    if not storage_modes:
        logger.warning("Running benchmarks with NO data storage (no database connection, CSV disabled)")
        logger.warning("Use --csv flag to enable CSV output when database is unavailable")
    else:
        logger.info(f"Running benchmarks with: {' + '.join(storage_modes)} storage")
-    
+
    return recorder


@ -393,16 +424,16 @@ if __name__ == "__main__":
    benches_folder_path = os.path.join(benchmarks_folder_path, "benches")

    repository, branch, commit_id, commit_msg, generate_csv, csv_output_dir = parse_arguments()
-    
+
    # Create a global metrics recorder
    global_metrics_recorder = create_global_metrics_recorder(repository, branch, commit_id, commit_msg, generate_csv)
-    
+
    successful_benchmarks = 0
    failed_benchmarks = 0
-    
+
    # Automatically discover all benchmark modules in benches/ folder
    benchmark_modules = []
-    
+
    if os.path.exists(benches_folder_path):
        logger.debug(f"Scanning for benchmarks in: {benches_folder_path}")
        for entry in os.scandir(benches_folder_path):
@ -410,12 +441,12 @@ if __name__ == "__main__":
                continue
            if entry.name.startswith("__"):  # Skip __init__.py, __pycache__, etc.
                continue
-                
+
            # Check if the file has a run_benchmark function
            try:
                logger.debug(f"checking if benches/{entry.name} has run_benchmark function")
                module = import_from_path(entry.name.split(".")[0], entry.path)
-                if hasattr(module, 'run_benchmark'):
+                if hasattr(module, "run_benchmark"):
                    benchmark_modules.append(entry.name)
                    logger.debug(f"discovered benchmark: {entry.name}")
                else:
@ -436,16 +467,18 @@ if __name__ == "__main__":
            logger.debug(f"loading: {module_name}")
            module = import_from_path(module_name.split(".")[0], module_path)
            logger.info(f"running benchmarks in: {module_name}")
-            
+
            # Check if the module has an updated run_benchmark function that accepts metrics_recorder
            try:
                # Try the new signature first
                module.run_benchmark(logger, repository, branch, commit_id, commit_msg, global_metrics_recorder)
            except TypeError:
                # Fall back to the old signature for backward compatibility
-                logger.warning(f"Module {module_name} using old run_benchmark signature - database connection will be created per module")
+                logger.warning(
+                    f"Module {module_name} using old run_benchmark signature - database connection will be created per module"
+                )
                module.run_benchmark(logger, repository, branch, commit_id, commit_msg)
-            
+
            successful_benchmarks += 1
        except ImportModuleException as e:
            logger.error(e)
@ -461,7 +494,7 @@ if __name__ == "__main__":
            logger.info(f"CSV reports have been generated and saved to the {csv_output_dir} directory")
        else:
            logger.info("CSV generation disabled - no CSV files created (use --csv to enable)")
-        
+
        logger.info(f"Benchmark run completed. Successful: {successful_benchmarks}, Failed: {failed_benchmarks}")
    except Exception as e:
        logger.error(f"Failed to export CSV results: {e}")
--- a/benchmark/optimum_benchmark_wrapper.py
+++ b/benchmark/optimum_benchmark_wrapper.py
@ -3,7 +3,11 @@ import subprocess


 def main(config_dir, config_name, args):
-    subprocess.run(["optimum-benchmark", "--config-dir", f"{config_dir}", "--config-name", f"{config_name}"] + ["hydra/job_logging=disabled", "hydra/hydra_logging=disabled"] + args)
+    subprocess.run(
+        ["optimum-benchmark", "--config-dir", f"{config_dir}", "--config-name", f"{config_name}"]
+        + ["hydra/job_logging=disabled", "hydra/hydra_logging=disabled"]
+        + args
+    )


 if __name__ == "__main__":
--- a/benchmark/requirements.txt
+++ b/benchmark/requirements.txt
@ -2,5 +2,5 @@ gpustat==1.1.1
 psutil==6.0.0
 psycopg2==2.9.9
 torch>=2.4.0
-hf_transfer
+hf_xet
 pandas>=1.5.0
--- a/benchmark_v2/.gitignore
+++ b/benchmark_v2/.gitignore
@ -0,0 +1,2 @@
+benchmark_results/
+benchmark_results_profiles/
--- a/benchmark_v2/README.md
+++ b/benchmark_v2/README.md
@ -0,0 +1,138 @@
+# Benchmarking v2
+
+A comprehensive benchmarking framework for transformer models that supports multiple execution modes (eager, compiled, kernelized), detailed performance metrics collection, and structured output format.
+
+
+## Quick Start
+
+### Running All Benchmarks
+
+```bash
+# Run all benchmarks with default settings
+python run_benchmarks.py
+
+# Specify output directory
+python run_benchmarks.py --output-dir my_results
+
+# Run with custom parameters
+python run_benchmarks.py \
+    --warmup-iterations 5 \
+    --measurement-iterations 10 \
+    --num-tokens-to-generate 200
+```
+
+### Uploading Results to HuggingFace Dataset
+
+You can automatically upload benchmark results to a HuggingFace Dataset for tracking and analysis:
+
+```bash
+# Upload to a public dataset with auto-generated run ID
+python run_benchmarks.py --upload-to-hub username/benchmark-results
+
+# Upload with a custom run ID for easy identification
+python run_benchmarks.py --upload-to-hub username/benchmark-results --run-id experiment_v1
+
+# Upload with custom HuggingFace token (if not set in environment)
+python run_benchmarks.py --upload-to-hub username/benchmark-results --token hf_your_token_here
+```
+
+**Dataset Directory Structure:**
+```
+dataset_name/
+├── 2025-01-15/
+│   ├── runs/                       # Non-scheduled runs (manual, PR, etc.)
+│   │   └── 123-1245151651/         # GitHub run number and ID
+│   │       └── benchmark_results/
+│   │           ├── benchmark_summary_20250115_143022.json
+│   │           └── model-name/
+│   │               └── model-name_benchmark_20250115_143022.json
+│   └── benchmark_results_abc123de/ # Scheduled runs (daily CI)
+│       ├── benchmark_summary_20250115_143022.json
+│       └── model-name/
+│           └── model-name_benchmark_20250115_143022.json
+└── 2025-01-16/
+    └── ...
+```
+
+**Authentication for Uploads:**
+
+For uploading results, you need a HuggingFace token with write permissions to the target dataset. You can provide the token in several ways (in order of precedence):
+
+1. Command line: `--token hf_your_token_here`
+3. Environment variable: `HF_TOKEN`
+
+### Running Specific Benchmarks
+
+```bash
+# Include only specific benchmarks
+python run_benchmarks.py --include llama
+
+# Exclude specific benchmarks
+python run_benchmarks.py --exclude old_benchmark
+
+## Output Format
+
+Results are saved as JSON files with the following structure:
+
+```json
+{
+  "model_name": "llama_2_7b",
+  "benchmark_scenarios": [
+    {
+      "scenario_name": "eager_variant",
+      "metadata": {
+        "timestamp": "2025-01-XX...",
+        "commit_id": "abc123...",
+        "hardware_info": {
+          "gpu_name": "NVIDIA A100",
+          "gpu_memory_total": 40960,
+          "cpu_count": 64
+        },
+        "config": {
+          "variant": "eager",
+          "warmup_iterations": 3,
+          "measurement_iterations": 5
+        }
+      },
+      "measurements": {
+        "latency": {
+          "mean": 2.45,
+          "median": 2.43,
+          "std": 0.12,
+          "min": 2.31,
+          "max": 2.67,
+          "p95": 2.61,
+          "p99": 2.65
+        },
+        "time_to_first_token": {
+          "mean": 0.15,
+          "std": 0.02
+        },
+        "tokens_per_second": {
+          "mean": 87.3,
+          "unit": "tokens/sec"
+        }
+      },
+      "gpu_metrics": {
+        "gpu_utilization_mean": 85.2,
+        "gpu_memory_used_mean": 12450
+      }
+    }
+  ]
+}
+```
+
+### Debug Mode
+
+```bash
+python run_benchmarks.py --log-level DEBUG
+```
+
+## Contributing
+
+To add new benchmarks:
+
+1. Create a new file in `benches/`
+2. Implement the `ModelBenchmark` interface
+3. Add a runner function (`run_<benchmark_name>` or `run_benchmark`)
+4. run_benchmarks.py
--- a/benchmark_v2/framework/benchmark_config.py
+++ b/benchmark_v2/framework/benchmark_config.py
@ -0,0 +1,215 @@
+import hashlib
+import json
+import logging
+from typing import Any
+
+
+KERNELIZATION_AVAILABLE = False
+try:
+    from kernels import Mode, kernelize  # noqa: F401
+
+    KERNELIZATION_AVAILABLE = True
+except ImportError:
+    pass
+
+logger = logging.getLogger(__name__)
+
+
+class BenchmarkConfig:
+    """Configuration for a single benchmark scenario."""
+
+    def __init__(
+        self,
+        warmup_iterations: int = 5,
+        measurement_iterations: int = 20,
+        gpu_monitoring: bool = False,  # False by default because it slows down the benchmark by a lot
+        batch_size: int = 1,
+        sequence_length: int = 128,
+        num_tokens_to_generate: int = 128,
+        attn_implementation: str = "eager",
+        sdpa_backend: str | None = None,
+        compile_mode: str | None = None,
+        compile_options: dict[str, Any] | None = None,
+        kernelize: bool = False,
+        name: str | None = None,
+        skip_validity_check: bool = False,
+    ) -> None:
+        # Benchmark parameters
+        self.warmup_iterations = warmup_iterations
+        self.measurement_iterations = measurement_iterations
+        self.gpu_monitoring = gpu_monitoring
+        # Input parameters
+        self.batch_size = batch_size
+        self.sequence_length = sequence_length
+        self.num_tokens_to_generate = num_tokens_to_generate
+        # Generation parameters
+        self.attn_implementation = attn_implementation
+        self.sdpa_backend = sdpa_backend
+        # Optimization parameters
+        self.compile_mode = compile_mode
+        self.compile_options = compile_options if compile_options is not None else {}
+        self.kernelize = kernelize
+        # Constant parameters
+        self.dtype = "torch.bfloat16"
+        self.device = "cuda"
+
+        self.check_validity(skip_validity_check)
+        self.name = name if name is not None else self.infer_name()
+
+    def check_validity(self, skip_validity_check: bool = False) -> None:
+        if skip_validity_check:
+            return
+        # Flash attention does not support compile mode, so we turn it off # FIXME: it would be better to support it
+        is_fa = self.attn_implementation == "flash_attention_2"
+        is_fa |= self.attn_implementation == "sdpa" and self.sdpa_backend == "flash_attention"
+        if is_fa:
+            logger.warning("Flash attention does not support compile mode. Turning off compile mode.")
+            self.compile_mode = None
+
+    @property
+    def hash(self) -> str:
+        return hashlib.sha256(json.dumps(self.to_dict()).encode()).hexdigest()
+
+    def infer_name(self, compact: bool = True) -> str:
+        """Infer a human-readable name for the benchmark config, either compact or verbose."""
+        if compact:
+            iter_str = f"w{self.warmup_iterations}_i{self.measurement_iterations}"
+            gpu_monitor_str = "monitored" if self.gpu_monitoring else "unmonitored"
+            dimensions_str = f"b{self.batch_size}_s{self.sequence_length}_n{self.num_tokens_to_generate}"
+            attn_code = self.attn_implementation
+            attn_code += f"_{self.sdpa_backend}" if self.attn_implementation == "sdpa" else ""
+            compile_str = f"compiled_{self.compile_mode}" if self.compile_mode is not None else "uncompiled"
+            kernelize_str = "kernelized" if self.kernelize else "unkernelized"
+            sep = "-"
+        else:
+            iter_str = f"{self.warmup_iterations} warmup, {self.measurement_iterations} iterations"
+            gpu_monitor_str = ("with" if self.gpu_monitoring else "no") + " GPU monitoring"
+            dimensions_str = f"batch size {self.batch_size}, sequence length {self.sequence_length}, {self.num_tokens_to_generate} generated tokens"
+            attn_code = f"{self.attn_implementation} attention"
+            attn_code += f" with {self.sdpa_backend} backend" if self.attn_implementation == "sdpa" else ""
+            compile_str = "compiled" if self.compile_mode is not None else "not compiled"
+            kernelize_str = "kernelized" if self.kernelize else "not kernelized"
+            sep = ", "
+        return sep.join([iter_str, gpu_monitor_str, dimensions_str, attn_code, compile_str, kernelize_str])
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "name": self.name,
+            "warmup_iterations": self.warmup_iterations,
+            "measurement_iterations": self.measurement_iterations,
+            "gpu_monitoring": self.gpu_monitoring,
+            "batch_size": self.batch_size,
+            "sequence_length": self.sequence_length,
+            "num_tokens_to_generate": self.num_tokens_to_generate,
+            "attn_implementation": self.attn_implementation,
+            "sdpa_backend": self.sdpa_backend,
+            "compile_mode": self.compile_mode,
+            "compile_options": self.compile_options | {},  # to avoid inplace modification of the original dict
+            "kernelize": self.kernelize,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any], skip_validity_check: bool = False) -> "BenchmarkConfig":
+        return cls(
+            warmup_iterations=data.get("warmup_iterations", 5),
+            measurement_iterations=data.get("measurement_iterations", 20),
+            gpu_monitoring=data.get("gpu_monitoring", False),
+            batch_size=data.get("batch_size", 1),
+            sequence_length=data.get("sequence_length", 128),
+            num_tokens_to_generate=data.get("num_tokens_to_generate", 128),
+            attn_implementation=data.get("attn_implementation", "eager"),
+            sdpa_backend=data.get("sdpa_backend"),
+            compile_mode=data.get("compile_mode"),
+            compile_options=data.get("compile_options"),
+            kernelize=data.get("kernelize", False),
+            name=data.get("name"),
+            skip_validity_check=skip_validity_check,
+        )
+
+
+def cross_generate_configs(
+    attn_impl_and_sdpa_backend: list[tuple[str, str | None]],
+    compiled_mode: list[str | None],
+    kernelized: list[bool],
+    warmup_iterations: int = 5,
+    measurement_iterations: int = 20,
+    batch_size: int = 1,
+    sequence_length: int = 128,
+    num_tokens_to_generate: int = 128,
+    gpu_monitoring: bool = False,  # this slows down the benchmark by a lot so we disable it by default
+) -> list[BenchmarkConfig]:
+    # Create kwargs common to all configs
+    kwargs = {
+        "warmup_iterations": warmup_iterations,
+        "measurement_iterations": measurement_iterations,
+        "batch_size": batch_size,
+        "sequence_length": sequence_length,
+        "num_tokens_to_generate": num_tokens_to_generate,
+        "gpu_monitoring": gpu_monitoring,
+    }
+    # Cross-generate all combinations of attn_implementation, compiled_mode, and kernelized
+    configs = []
+    for attn_implementation, sdpa_backend in list(dict.fromkeys(attn_impl_and_sdpa_backend)):
+        for cm in list(dict.fromkeys(compiled_mode)):
+            for kernelize_on in list(dict.fromkeys(kernelized)):
+                config = BenchmarkConfig(
+                    attn_implementation=attn_implementation,
+                    sdpa_backend=sdpa_backend,
+                    compile_mode=cm,
+                    kernelize=kernelize_on,
+                    **kwargs,
+                )
+                configs.append(config)
+    return configs
+
+
+def generate_all_configs(
+    warmup_iterations: int = 5,
+    measurement_iterations: int = 20,
+    batch_size: int = 1,
+    sequence_length: int = 128,
+    num_tokens_to_generate: int = 128,
+    gpu_monitoring: bool = False,
+) -> list[BenchmarkConfig]:
+    all_attn_implementations = [
+        ("flash_attention_2", None),
+        ("eager", None),
+        ("sdpa", "math"),
+        ("sdpa", "flash_attention"),
+        ("flex_attention", None),
+    ]
+    return cross_generate_configs(
+        attn_impl_and_sdpa_backend=all_attn_implementations,
+        compiled_mode=[None, "default", "reduce-overhead", "max-autotune", "max-autotune-no-cudagraphs"],
+        kernelized=[False, KERNELIZATION_AVAILABLE],
+        warmup_iterations=warmup_iterations,
+        measurement_iterations=measurement_iterations,
+        batch_size=batch_size,
+        sequence_length=sequence_length,
+        num_tokens_to_generate=num_tokens_to_generate,
+        gpu_monitoring=gpu_monitoring,
+    )
+
+
+def generate_main_configs(
+    warmup_iterations: int = 5,
+    measurement_iterations: int = 20,
+    batch_size: int = 1,
+    sequence_length: int = 128,
+    num_tokens_to_generate: int = 128,
+    gpu_monitoring: bool = False,
+) -> list[BenchmarkConfig]:
+    # Create kwargs common to all configs
+    kwargs = {
+        "warmup_iterations": warmup_iterations,
+        "measurement_iterations": measurement_iterations,
+        "batch_size": batch_size,
+        "sequence_length": sequence_length,
+        "num_tokens_to_generate": num_tokens_to_generate,
+        "gpu_monitoring": gpu_monitoring,
+    }
+    return [  # TODO: test max-autotune instead of default
+        BenchmarkConfig(attn_implementation="flex_attention", compile_mode="default", **kwargs),
+        BenchmarkConfig(attn_implementation="eager", compile_mode="default", **kwargs),
+        BenchmarkConfig(attn_implementation="flash_attention_2", **kwargs),
+    ]
--- a/benchmark_v2/framework/benchmark_runner.py
+++ b/benchmark_v2/framework/benchmark_runner.py
@ -0,0 +1,389 @@
+import gc
+import json
+import logging
+import os
+import pathlib
+import re
+import time
+from contextlib import nullcontext
+from datetime import datetime
+from queue import Queue
+from typing import Any
+
+import torch
+from tqdm import trange
+
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    CompileConfig,
+    GenerationConfig,
+    GenerationMixin,
+)
+from transformers.generation.streamers import BaseStreamer
+
+from .benchmark_config import BenchmarkConfig
+from .data_classes import BenchmarkMetadata, BenchmarkResult, GPURawMetrics, pretty_print_dict
+from .hardware_metrics import GPUMonitor
+
+
+try:
+    from kernels import Mode, kernelize  # noqa: F401
+except ImportError:
+    kernelize = None
+    Mode = None
+
+
+DEFAULT_PROMPT = "\n".join([
+    "The French Revolution was a period of political and societal change in France that began with the Estates General of 1789 and ended with the Coup of 18 Brumaire on 9 November 1799.",
+    "Many of the revolution's ideas are considered fundamental principles of liberal democracy, and its values remain central to modern French political discourse.",
+    "It was caused by a combination of social, political, and economic factors which the existing regime proved unable to manage.",
+    "Financial crisis and widespread social distress led to the convocation of the Estates General in May 1789, its first meeting since 1614.",
+    "The representatives of the Third Estate broke away and re-constituted themselves as a National Assembly in June.",
+    "The Storming of the Bastille in Paris on 14 July led to a series of radical measures by the Assembly, including the abolition of feudalism, state control over the Catholic Church in France, and issuing the Declaration of the Rights of Man and of the Citizen.",
+    "The next three years were dominated by a struggle for political control.",
+    "King Louis XVI's attempted flight to Varennes in June 1791 further discredited the monarchy, and military defeats after the outbreak of the French Revolutionary Wars in April 1792 led to the insurrection of 10 August 1792.",
+    "As a result, the monarchy was replaced by the French First Republic in September, followed by the execution of Louis XVI himself in January 1793.",
+    "After another revolt in June 1793, the constitution was suspended, and political power passed from the National Convention to the Committee of Public Safety, dominated by radical Jacobins led by Maximilien Robespierre.",
+    "About 16,000 people were sentenced by the Revolutionary Tribunal and executed in the Reign of Terror, which ended in July 1794 with the Thermidorian Reaction.",
+    "Weakened by external threats and internal opposition, the Committee of Public Safety was replaced in November 1795 by the Directory.",
+    "Its instability ended in the coup of 18 Brumaire and the establishment of the Consulate, with Napoleon Bonaparte as First Consul.",
+])  # fmt: skip
+
+
+def compact_json_numeric_arrays(data: dict):
+    # Match arrays that contain only numbers (ints/floats), whitespace, commas, and newlines
+    pattern = r"\[\s*\n\s*((?:\d+(?:\.\d+)?\s*,\s*)*\d+(?:\.\d+)?)\s*\n\s*\]"
+
+    def replace_numeric_array(match):
+        # Get the array content
+        content = match.group(1)
+        # Remove extra whitespace but keep commas
+        compact_content = re.sub(r"\s+", " ", content).strip()
+        return f"[{compact_content}]"
+
+    return re.sub(pattern, replace_numeric_array, json.dumps(data, indent=4, default=str), flags=re.DOTALL)
+
+
+def get_git_revision() -> str:
+    base_path = pathlib.Path(__file__).parent.parent.parent
+    git_dir = base_path / ".git"
+    with (git_dir / "HEAD").open("r") as head:
+        ref = head.readline().split(" ")[-1].strip()
+    with (git_dir / ref).open("r") as git_hash:
+        return git_hash.readline().strip()
+
+
+def get_sdpa_backend(backend_name: str | None) -> torch.nn.attention.SDPBackend | None:
+    """Get the SDPA backend enum from string name."""
+    if backend_name is None:
+        return None
+
+    try:
+        backend_map = {
+            "math": torch.nn.attention.SDPBackend.MATH,
+            "flash_attention": torch.nn.attention.SDPBackend.FLASH_ATTENTION,
+            "efficient_attention": torch.nn.attention.SDPBackend.EFFICIENT_ATTENTION,
+            "cudnn_attention": torch.nn.attention.SDPBackend.CUDNN_ATTENTION,
+        }
+        return backend_map.get(backend_name.lower())
+    except AttributeError:
+        # torch.nn.attention.SDPBackend not available in older torch versions
+        return None
+
+
+def flush_memory():
+    """Flush GPU memory and run garbage collection."""
+    gc.collect()
+    # Dynamo resets
+    torch._dynamo.reset()
+    torch._dynamo.reset_code_caches()
+    if hasattr(torch._inductor, "codecache"):
+        # Clear FX graph cache
+        if hasattr(torch._inductor.codecache, "FxGraphCache"):
+            torch._inductor.codecache.FxGraphCache.clear()
+        # Clear PyCodeCache
+        if hasattr(torch._inductor.codecache, "PyCodeCache"):
+            torch._inductor.codecache.PyCodeCache.cache_clear()
+        # Clear TritonFuture cache (for async compilation)
+        if hasattr(torch._inductor.codecache, "TritonFuture"):
+            if hasattr(torch._inductor.codecache.TritonFuture, "_compile_cache"):
+                torch._inductor.codecache.TritonFuture._compile_cache.clear()
+    # Clear CUDA cache
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.reset_max_memory_allocated()
+        torch.cuda.reset_peak_memory_stats()
+        torch.cuda.synchronize()
+    gc.collect()
+
+
+class BenchmarkStreamer(BaseStreamer):
+    def __init__(self, **kwargs) -> None:
+        self.timestamps = []
+        self.text_queue = Queue()
+
+    def put(self, value):
+        """Receives tokens and logs the timestamp of the generation."""
+        self.timestamps.append(time.perf_counter())
+
+    def end(self):
+        self.timestamps.append(time.perf_counter())
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        value = self.text_queue.get(timeout=self.timeout)
+        if value == self.stop_signal:
+            raise StopIteration()
+        else:
+            return value
+
+
+class BenchmarkRunner:
+    """Main benchmark runner that coordinates benchmark execution."""
+
+    def __init__(self, logger: logging.Logger, output_dir: str | None = None, commit_id: str | None = None) -> None:
+        # Those stay constant for the whole run
+        self.logger = logger
+        if output_dir is None:
+            output_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "benchmark_results")
+        self.output_dir = output_dir
+        self.commit_id = get_git_revision() if commit_id is None else commit_id
+        os.makedirs(self.output_dir, exist_ok=True)
+        self.profile_dir = None
+        # Attributes that are reset for each model
+        self._setup_for = ""
+        # Attributes that are reset for each run
+        self.model: GenerationMixin | None = None
+
+    def cleanup(self) -> None:
+        del self.model
+        self.model = None
+        flush_memory()
+
+    def setup_one_run(self, model_id: str, config: BenchmarkConfig) -> None:
+        # Some attributes only need to be set once per model
+        if self._setup_for != model_id:
+            self.tokenizer = AutoTokenizer.from_pretrained(model_id)
+            # We set the EOS token to the padding token for open-ended generation
+            self.tokenizer.eos_token = self.tokenizer.pad_token
+            self._setup_for = model_id
+
+        # Prepare inputs
+        self.inputs = self.tokenizer(
+            [DEFAULT_PROMPT for _ in range(config.batch_size)],
+            return_tensors="pt",
+            max_length=config.sequence_length,
+            truncation=True,
+            return_attention_mask=True,
+        ).to(config.device)
+        self.inputs["use_cache"] = True
+
+        # Prepare generation config
+        gen_config = GenerationConfig(
+            do_sample=False, top_p=1.0, temperature=1.0, max_new_tokens=config.num_tokens_to_generate
+        )
+
+        # Prepare compile config
+        if config.compile_mode is not None:
+            gen_config.compile_config = CompileConfig(mode=config.compile_mode, options=config.compile_options)
+            gen_config.cache_implementation = "static"
+
+        # Load model
+        self.logger.debug(f"Loading model {model_id} on device {config.device}...")
+        dtype = getattr(torch, config.dtype.removeprefix("torch."))
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_id, dtype=dtype, attn_implementation=config.attn_implementation, generation_config=gen_config
+        )
+        self.model = self.model.eval().to(config.device)
+
+        # Kernelize the model if needed
+        if config.kernelize:
+            self.model = kernelize(self.model, mode=Mode.INFERENCE)
+
+    def run_one_benchmark(self, model_id: str, config: BenchmarkConfig, num_tokens_to_profile: int = 0) -> None:
+        sdpa_ctx = nullcontext()
+        if config.attn_implementation == "sdpa":
+            sdpa_backend = get_sdpa_backend(config.sdpa_backend)
+            sdpa_ctx = torch.nn.attention.sdpa_kernel(sdpa_backend)
+
+        with sdpa_ctx, torch.no_grad():
+            self.logger.info(f"Running benchmark scenario: {config.name}")
+
+            # Quick validation: try one measurement first to see if this scenario works
+            flush_memory()
+            e2e_latency, token_generation_times, shape_and_decoded_output, gpu_metrics = self.time_generate(
+                max_new_tokens=1, gpu_monitor=None
+            )
+            if e2e_latency < 0:
+                self.logger.warning(f"Skipping config {config.name}: {e2e_latency = } (no GPU monitoring)")
+                return None
+
+            # Warmup runs
+            self.logger.info(f"Warming up with {config.warmup_iterations} iterations...")
+            for _ in trange(config.warmup_iterations):
+                _ = self.time_generate(max_new_tokens=config.num_tokens_to_generate)
+            self.logger.info("Warmup over.")
+
+            # Measurement runs
+            result = BenchmarkResult()
+            self.logger.info(f"Benchmarking with {config.measurement_iterations} iterations.")
+            for _ in trange(config.measurement_iterations):
+                e2e_latency, token_generation_times, shape_and_decoded_output, gpu_metrics = self.time_generate(
+                    max_new_tokens=config.num_tokens_to_generate,
+                    gpu_monitor=(GPUMonitor(logger=self.logger) if config.gpu_monitoring else None),
+                )
+                result.accumulate(e2e_latency, token_generation_times, shape_and_decoded_output, gpu_metrics)
+            self.logger.info("Benchmarking done. Cleaning up.")
+
+            # Profile if needed
+            if num_tokens_to_profile > 0:
+                self.profile_generate(num_tokens_to_profile, config.name)
+
+            return {
+                "metadata": BenchmarkMetadata(model_id=model_id, commit_id=self.commit_id),
+                "measurements": result,
+                "config": config,
+            }
+
+    def time_generate(
+        self,
+        max_new_tokens: int,
+        gpu_monitor: GPUMonitor | None = None,
+    ) -> tuple[float, list[float], str, GPURawMetrics | None]:
+        """Time the latency of a call to model.generate() with the given (inputs) and (max_new_tokens)."""
+        # Prepare gpu monitoring if needed
+        if gpu_monitor is not None:
+            gpu_monitor.start()
+        # Prepare streamer
+        streamer = BenchmarkStreamer()
+        # Generate and time
+        wall_time_0 = time.perf_counter()
+        outputs = self.model.generate(
+            **self.inputs,
+            max_new_tokens=max_new_tokens,
+            streamer=streamer,
+        )
+        wall_time_1 = time.perf_counter()
+        # Stop gpu monitoring if needed
+        gpu_metrics = gpu_monitor.stop_and_collect() if gpu_monitor is not None else None
+        # Check if generation had the right number of tokens
+        input_tokens = self.inputs["input_ids"].size(-1)
+        batch_size, output_tokens = outputs.shape
+        new_tokens = output_tokens - input_tokens
+        if new_tokens != max_new_tokens:
+            raise RuntimeError(f"Generated {new_tokens} tokens, expected {max_new_tokens}")
+        # Decode outputs
+        decoded_output = self.tokenizer.decode(outputs[0, input_tokens:], skip_special_tokens=True)
+        shape_and_decoded_output = f"{tuple(outputs.shape)} | {decoded_output}"
+        # Compute intermediate quantities
+        e2e_latency = wall_time_1 - wall_time_0
+        token_generation_times = [t - wall_time_0 for t in streamer.timestamps[1:]]
+        return e2e_latency, token_generation_times, shape_and_decoded_output, gpu_metrics
+
+    def profile_generate(self, num_tokens_to_profile: int, config_name: str) -> None:
+        """Profile the latency of a call to model.generate() with the given (inputs) and (max_new_tokens)."""
+        profiler = torch.profiler.profile(
+            activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA],
+            record_shapes=True,
+        )
+        with profiler as prof:
+            _ = self.model.generate(
+                **self.inputs,
+                max_new_tokens=num_tokens_to_profile,
+            )
+        if self.profile_dir is None:
+            self.profile_dir = self.output_dir + "_profiles"
+            os.makedirs(self.profile_dir, exist_ok=True)
+        prof.export_chrome_trace(f"{self.profile_dir}/{config_name}.json")
+
+    def run_benchmarks(
+        self,
+        model_id: str,
+        benchmark_configs: list[BenchmarkConfig],
+        num_tokens_to_profile: int = 0,
+        pretty_print_summary: bool = True,
+    ) -> dict[str, Any]:
+        all_results = {}
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        start_time = time.perf_counter()
+
+        n_configs = len(benchmark_configs)
+        for i, config in enumerate(benchmark_configs):
+            # Handle SDPA backend if not determined by the config (needs to be done before skipping duplicates)
+            if config.attn_implementation == "sdpa" and config.sdpa_backend is None:
+                default_backend = "flash_attention"  # FIXME: torch has a _cur_sdpa_kernel_backends but it fails
+                self.logger.warning(f"No SDPA backend provided, using {default_backend} instead.")
+                config.sdpa_backend = default_backend
+
+            # Skip if already run
+            if config.hash in all_results:
+                self.logger.info(f"Skipping duplicate config {config.name} for model {model_id} ({i + 1}/{n_configs})")
+                continue
+
+            # Otherwise, run the benchmark
+            self.setup_one_run(model_id, config)
+            self.logger.info(
+                f"Running benchmark of model {model_id} with scenario: {config.name} ({i + 1}/{n_configs})"
+            )
+
+            # Launch benchmark in a try/except block to avoid stopping the whole run if one benchmark fails
+            try:
+                results = self.run_one_benchmark(model_id, config, num_tokens_to_profile)
+                if results is not None:
+                    all_results[config.hash] = results
+
+            except Exception as e:
+                self.logger.error(f"Error running with scenario: {config.name}:\n{repr(e)}")
+            # Cleanup model and save results
+            self.cleanup()
+            self.save_results(model_id, all_results, timestamp=timestamp)
+
+        if pretty_print_summary:
+            print()
+            print("=" * 100)
+            print(f"Finished benchmarks in {time.perf_counter() - start_time:.2f} seconds")
+            print(f"Total number of benchmarks: {len(all_results)}")
+            if len(all_results) > 0:
+                print("First run metadata:")
+                first_key = list(all_results.keys())[0]
+                first_metadata = all_results[first_key]["metadata"].to_dict()
+                hardware_info = first_metadata.pop("hardware_info")
+                pretty_print_dict(first_metadata | hardware_info, tabs=1)
+            for result in all_results.values():
+                print("=" * 100)
+                print(f"Config: {result['config'].infer_name(compact=False)}\n")
+                result["measurements"].pprint(batch_size=result["config"].batch_size, tabs=1)
+            print("=" * 100)
+
+        return all_results
+
+    def save_results(self, model_name: str, results: dict, timestamp: str = "") -> str:
+        """Save benchmark results to JSON file."""
+        # Create model-specific subdirectory
+        model_name = model_name.replace("/", "_")
+        model_dir = os.path.join(self.output_dir, model_name)
+        os.makedirs(model_dir, exist_ok=True)
+
+        # Create filename with timestamp
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") if not timestamp else timestamp
+        filename = f"{model_name}_benchmark_{timestamp}.json"
+        filepath = os.path.join(model_dir, filename)
+
+        # Convert results to dict
+        converted_results = {}
+        for cfg_hash in results.keys():
+            converted_results[cfg_hash] = {
+                "metadata": results[cfg_hash]["metadata"].to_dict(),
+                "measurements": results[cfg_hash]["measurements"].to_dict(),
+                "config": results[cfg_hash]["config"].to_dict(),
+            }
+
+        # Save to JSON file
+        with open(filepath, "w") as f:
+            f.write(compact_json_numeric_arrays(converted_results))
+
+        self.logger.info(f"Results saved to {filepath}")
+        return filepath
--- a/benchmark_v2/framework/data_classes.py
+++ b/benchmark_v2/framework/data_classes.py
@ -0,0 +1,160 @@
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any
+
+import numpy as np
+
+from .hardware_metrics import GPURawMetrics, HardwareInfo
+
+
+def compute_basic_statistics(measurements: list[float]) -> dict[str, float]:
+    return {
+        "avg": np.mean(measurements),
+        "std": np.std(measurements),
+        "min": np.min(measurements),
+        "med": np.median(measurements),
+        "max": np.max(measurements),
+        "p95": np.percentile(measurements, 95),
+    }
+
+
+def add_unit_to_duration(stats: dict[str, float]) -> dict[str, str]:
+    for key in list(stats.keys()):
+        value = stats[key]
+        if value > 3600:
+            stats[key] = f"{(value / 3600):.2f}hr"
+        elif value > 60:
+            stats[key] = f"{(value / 60):.2f}min"
+        elif value > 1:
+            stats[key] = f"{value:.2f}s"
+        elif value > 1e-3:
+            stats[key] = f"{(value * 1e3):.2f}ms"
+        elif value > 1e-6:
+            stats[key] = f"{(value * 1e6):.2f}us"
+        else:
+            stats[key] = f"{(value * 1e9):.2f}ns"
+    return stats
+
+
+def equalize_lengths_and_collate(stats: list[dict[str, str]]) -> list[str]:
+    keys = ["avg", "std", "min", "med", "max", "p95"]
+    for key in keys:
+        max_length = max(len(stat[key]) for stat in stats)
+        for stat in stats:
+            stat[key] = stat[key].ljust(max_length, " ")
+    return [" ".join([f"{key}={stat[key]}" for key in keys]) for stat in stats]
+
+
+def pretty_print_dict(data: dict[str, Any], tabs: int = 0) -> None:
+    max_key_length = max([len(key) for key in data.keys()])
+    for key, value in data.items():
+        tabs_str = "  " * tabs
+        padded_key = key.ljust(max_key_length + 1, ".")
+        print(f"{tabs_str}{padded_key}: {value}")
+
+
+@dataclass
+class BenchmarkMetadata:
+    """Metadata collected for each benchmark run."""
+
+    model_id: str
+    timestamp: str
+    commit_id: str
+    hardware_info: HardwareInfo
+
+    def __init__(self, model_id: str, commit_id: str):
+        self.model_id = model_id
+        self.timestamp = datetime.utcnow().isoformat()
+        self.commit_id = commit_id
+        self.hardware_info = HardwareInfo()
+
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "timestamp": self.timestamp,
+            "commit_id": self.commit_id,
+            "hardware_info": self.hardware_info.to_dict(),
+        }
+
+
+class BenchmarkResult:
+    """Result from a series of benchmark runs."""
+
+    def __init__(self) -> None:
+        self.e2e_latency = []
+        self.token_generation_times = []  # time at which each token was generated (relative to start of the generation)
+        self.shape_and_decoded_outputs = []
+        self.gpu_metrics = []
+
+    def accumulate(
+        self,
+        e2e_latency: float,
+        token_generation_times: list[float],
+        shape_and_decoded_output: str,
+        gpu_metrics: GPURawMetrics | None,
+    ) -> None:
+        self.e2e_latency.append(e2e_latency)
+        self.token_generation_times.append(token_generation_times)
+        self.shape_and_decoded_outputs.append(shape_and_decoded_output)
+        self.gpu_metrics.append(gpu_metrics)
+
+    def to_dict(self) -> dict[str, None | int | float]:
+        # Save GPU metrics as None if it contains only None values
+        if all(gm is None for gm in self.gpu_metrics):
+            gpu_metrics = None
+        else:
+            gpu_metrics = [gm.to_dict() for gm in self.gpu_metrics]
+        return {
+            "e2e_latency": self.e2e_latency,
+            "token_generation_times": self.token_generation_times,
+            "shape_and_decoded_outputs": self.shape_and_decoded_outputs,
+            "gpu_metrics": gpu_metrics,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict[str, None | int | float]) -> "BenchmarkResult":
+        # Handle GPU metrics, which is saved as None if it contains only None values
+        if data["gpu_metrics"] is None:
+            gpu_metrics = [None for _ in range(len(data["e2e_latency"]))]
+        else:
+            gpu_metrics = [GPURawMetrics.from_dict(gm) for gm in data["gpu_metrics"]]
+        # Create a new instance and accumulate the data
+        new_instance = cls()
+        for i in range(len(data["e2e_latency"])):
+            new_instance.accumulate(
+                e2e_latency=data["e2e_latency"][i],
+                token_generation_times=data["token_generation_times"][i],
+                shape_and_decoded_output=data["shape_and_decoded_outputs"][i],
+                gpu_metrics=gpu_metrics[i],
+            )
+        return new_instance
+
+    def get_measured_ttft(self) -> list[float]:
+        return [dt[0] for dt in self.token_generation_times if len(dt) > 0]
+
+    def get_measured_itl(self) -> list[float]:
+        return [(dt[-1] - dt[0]) / (len(dt) - 1) for dt in self.token_generation_times if len(dt) > 1]
+
+    def get_throughput(self, batch_size: int) -> float:
+        return [
+            batch_size * len(dt) / e2e_latency
+            for e2e_latency, dt in zip(self.e2e_latency, self.token_generation_times)
+        ]
+
+    def pprint(self, batch_size: int = 0, tabs: int = 0) -> None:
+        stats_to_collate = [
+            add_unit_to_duration(compute_basic_statistics(self.e2e_latency)),
+            add_unit_to_duration(compute_basic_statistics(self.get_measured_ttft())),
+            add_unit_to_duration(compute_basic_statistics(self.get_measured_itl())),
+        ]
+        if batch_size > 0:
+            throughput_stats = compute_basic_statistics(self.get_throughput(batch_size))
+            stats_to_collate.append({key: f"{value:.2f}tok/s" for key, value in throughput_stats.items()})
+        collated_stats = equalize_lengths_and_collate(stats_to_collate)
+        dict_to_pprint = {
+            "E2E Latency": collated_stats[0],
+            "Time to First Token": collated_stats[1],
+            "Inter-Token Latency": collated_stats[2],
+        }
+        if batch_size > 0:
+            dict_to_pprint["Throughput"] = collated_stats[3]
+        pretty_print_dict(dict_to_pprint, tabs=tabs)
--- a/benchmark_v2/framework/hardware_metrics.py
+++ b/benchmark_v2/framework/hardware_metrics.py
@ -0,0 +1,171 @@
+import json
+import logging
+import subprocess
+import sys
+import threading
+import time
+from dataclasses import dataclass
+from enum import Enum
+from logging import Logger
+
+import gpustat
+import psutil
+import torch
+
+
+# Data class to hold the hardware information
+def get_device_name_and_memory_total() -> tuple[str, float]:
+    """Returns the name and memory total of GPU 0."""
+    device_name = torch.cuda.get_device_properties(0).name
+    device_memory_total = torch.cuda.get_device_properties(0).total_memory / 1024**3
+    return device_name, device_memory_total
+
+
+class HardwareInfo:
+    """A class to hold information about the hardware."""
+
+    def __init__(self) -> None:
+        # Retrieve GPU stats
+        try:
+            self.gpu_name, self.gpu_memory_total_gb = get_device_name_and_memory_total()
+        except Exception:
+            self.gpu_name, self.gpu_memory_total_gb = None, None
+        # Retrieve python, torch and CUDA version
+        self.python_version = f"{sys.version.split()[0]}"
+        self.torch_version = torch.__version__
+        if hasattr(torch, "cuda") and torch.cuda.is_available():
+            self.cuda_version = torch.version.cuda
+        else:
+            self.cuda_version = None
+        # Retrieve general hardware information
+        self.cpu_count = psutil.cpu_count()
+        self.memory_total_mb = int(psutil.virtual_memory().total / (1024 * 1024))
+
+    def to_dict(self) -> dict[str, None | int | float | str]:
+        return {
+            "gpu_name": self.gpu_name,
+            "gpu_memory_total_gb": self.gpu_memory_total_gb,
+            "python_version": self.python_version,
+            "torch_version": self.torch_version,
+        }
+
+
+# Functions to get information about the GPU
+def get_amd_gpu_stats() -> tuple[int, float]:
+    """Returns the utilization and memory used of an AMD GPU, both in percent"""
+    rocm_smi_output = subprocess.check_output(["rocm-smi", "--json", "--showuse", "--showmeminfo", "VRAM"])
+    gpu_stats = json.loads(rocm_smi_output.decode("utf-8"))
+    gpu_stats = [
+        (card_id, stats["GPU use (%)"], stats["VRAM Total Used Memory (B)"]) for card_id, stats in gpu_stats.items()
+    ]
+    gpu_stats.sort(key=lambda x: x[1], reverse=True)
+    return int(gpu_stats[0][1]), float(gpu_stats[0][2]) / 1024**3
+
+
+def get_nvidia_gpu_stats() -> tuple[int, float]:
+    """Returns the utilization and memory used of an NVIDIA GPU, both in percent"""
+    gpu_stats = gpustat.GPUStatCollection.new_query()
+    gpu_stats = gpu_stats[0]
+    return int(gpu_stats["utilization.gpu"]), float(gpu_stats["memory.used"]) / 1024**3
+
+
+class GPUStatsCollector:
+    """A class to get statistics about the GPU. It serves as a wrapper that holds the GPU total memory and its name,
+    which is used to call the right function to get the utilization and memory used."""
+
+    def __init__(self) -> None:
+        self.device_name, self.device_memory_total = get_device_name_and_memory_total()
+        # Monkey patch the get_utilization_and_memory_used method based on the GPU type
+        if "amd" in self.device_name.lower():
+            self.get_utilization_and_memory_used = get_amd_gpu_stats
+        elif "nvidia" in self.device_name.lower():
+            self.get_utilization_and_memory_used = get_nvidia_gpu_stats
+        else:
+            raise RuntimeError(f"Unsupported GPU: {self.device_name}")
+
+    def get_measurements(self) -> tuple[int, float]:
+        """Get the utilization and memory used of the GPU, both in percent"""
+        raise NotImplementedError("This method is meant to be monkey patched during __init__")
+
+
+# Simple data classes to hold the raw GPU metrics
+class GPUMonitoringStatus(Enum):
+    """Status of GPU monitoring."""
+
+    SUCCESS = "success"
+    FAILED = "failed"
+    NO_GPUS_AVAILABLE = "no_gpus_available"
+    NO_SAMPLES_COLLECTED = "no_samples_collected"
+
+
+@dataclass
+class GPURawMetrics:
+    """Raw values for GPU utilization and memory used."""
+
+    utilization: list[float]  # in percent
+    memory_used: list[float]  # in GB
+    timestamps: list[float]  # in seconds
+    timestamp_0: float  # in seconds
+    monitoring_status: GPUMonitoringStatus
+
+    def to_dict(self) -> dict[str, None | int | float | str]:
+        return {
+            "utilization": self.utilization,
+            "memory_used": self.memory_used,
+            "timestamps": self.timestamps,
+            "timestamp_0": self.timestamp_0,
+            "monitoring_status": self.monitoring_status.value,
+        }
+
+
+# Main class, used to monitor the GPU utilization during benchmark execution
+class GPUMonitor:
+    """Monitor GPU utilization during benchmark execution."""
+
+    def __init__(self, sample_interval_sec: float = 0.1, logger: Logger | None = None):
+        self.sample_interval_sec = sample_interval_sec
+        self.logger = logger if logger is not None else logging.getLogger(__name__)
+
+        self.num_available_gpus = torch.cuda.device_count()
+        if self.num_available_gpus == 0:
+            raise RuntimeError("No GPUs detected by torch.cuda.device_count().")
+        self.gpu_stats_getter = GPUStatsCollector()
+
+    def start(self):
+        """Start monitoring GPU metrics."""
+        # Clear the stop event to enable monitoring
+        self.stop_event = threading.Event()
+        self.gpu_utilization = []
+        self.gpu_memory_used = []
+        self.timestamps = []
+        self.thread = threading.Thread(target=self._monitor_loop)
+        self.thread.start()
+        self.logger.debug("GPU monitoring started")
+
+    def stop_and_collect(self) -> GPURawMetrics:
+        """Stop monitoring and return collected metrics."""
+        self.stop_event.set()
+        self.thread.join()
+        if self.gpu_utilization:
+            timestamp_0 = self.timestamps[0]
+            metrics = GPURawMetrics(
+                utilization=self.gpu_utilization,
+                memory_used=self.gpu_memory_used,
+                timestamps=[t - timestamp_0 for t in self.timestamps],
+                timestamp_0=timestamp_0,
+                monitoring_status=GPUMonitoringStatus.SUCCESS,
+            )
+            self.logger.debug(f"GPU monitoring completed: {len(self.gpu_utilization)} samples collected")
+        else:
+            metrics = GPURawMetrics(monitoring_status=GPUMonitoringStatus.NO_SAMPLES_COLLECTED)
+        return metrics
+
+    def _monitor_loop(self):
+        """Background monitoring loop using threading.Event for communication."""
+        while not self.stop_event.is_set():
+            utilization, memory_used = self.gpu_stats_getter.get_utilization_and_memory_used()
+            self.gpu_utilization.append(utilization)
+            self.gpu_memory_used.append(memory_used)
+            self.timestamps.append(time.time())
+            if self.stop_event.wait(timeout=self.sample_interval_sec):
+                break
--- a/benchmark_v2/requirements.txt
+++ b/benchmark_v2/requirements.txt
@ -0,0 +1,7 @@
+numpy>=1.21.0
+psutil>=5.8.0
+gpustat>=1.0.0
+torch>=2.0.0
+transformers>=4.30.0
+datasets>=2.10.0
+huggingface_hub>=0.16.0 
--- a/benchmark_v2/run_benchmarks.py
+++ b/benchmark_v2/run_benchmarks.py
@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Top-level benchmarking script that automatically discovers and runs all benchmarks
+in the ./benches directory, organizing outputs into model-specific subfolders.
+"""
+
+import argparse
+import logging
+import sys
+import uuid
+
+from framework.benchmark_config import BenchmarkConfig, generate_all_configs, generate_main_configs
+from framework.benchmark_runner import BenchmarkRunner
+
+
+if __name__ == "__main__":
+    # Parse arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output-dir", type=str, default=None, help="Output dir for benchmark results")
+    parser.add_argument("--log-level", type=str, choices=["DEBUG", "INFO", "WARNING", "ERROR"], default="INFO")
+    parser.add_argument("--model-id", type=str, help="Specific model ID to benchmark (if supported by benchmarks)")
+
+    parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations")
+    parser.add_argument("--iterations", type=int, default=10, help="Number of measurement iterations")
+
+    parser.add_argument("--batch-size", "-b", type=int, nargs="+", help="Batch size")
+    parser.add_argument("--sequence-length", "-s", type=int, nargs="+", help="Sequence length")
+    parser.add_argument("--num-tokens-to-generate", "-n", type=int, nargs="+", help="Number of tokens to generate")
+
+    parser.add_argument("--cross-generate", action="store_true", help="Cross-generate all combinations of configs")
+    parser.add_argument("--num-tokens-to-profile", "-p", type=int, default=0, help="Number of tokens to profile")
+
+    parser.add_argument("--commit-id", type=str, help="Git commit ID (if not provided, will auto-detect from git)")
+    args = parser.parse_args()
+
+    # Setup logging
+    benchmark_run_uuid = str(uuid.uuid4())[:8]
+    numeric_level = getattr(logging, args.log_level.upper())
+
+    handlers = [logging.StreamHandler(sys.stdout)]
+    logging.basicConfig(
+        level=numeric_level, format="[%(levelname)s - %(asctime)s] %(name)s: %(message)s", handlers=handlers
+    )
+
+    logger = logging.getLogger("benchmark_v2")
+    logger.info("Starting benchmark discovery and execution")
+    logger.info(f"Benchmark run UUID: {benchmark_run_uuid}")
+    logger.info(f"Output directory: {args.output_dir}")
+
+    # Error out if one of the arguments is not provided
+    if len(args.batch_size) * len(args.sequence_length) * len(args.num_tokens_to_generate) == 0:
+        raise ValueError(
+            "At least one of the arguments --batch-size, --sequence-length, or --num-tokens-to-generate is required"
+        )
+
+    # If there is only one (batch_size, sequence_length, num_tokens_to_generate), we benchmark across configs
+    elif len(args.batch_size) * len(args.sequence_length) * len(args.num_tokens_to_generate) == 1:
+        if args.cross_generate:
+            benchmark_configs = generate_all_configs(
+                warmup_iterations=args.warmup,
+                measurement_iterations=args.iterations,
+                batch_size=args.batch_size[0],
+                sequence_length=args.sequence_length[0],
+                num_tokens_to_generate=args.num_tokens_to_generate[0],
+            )
+        else:
+            benchmark_configs = generate_main_configs(
+                warmup_iterations=args.warmup,
+                measurement_iterations=args.iterations,
+                batch_size=args.batch_size[0],
+                sequence_length=args.sequence_length[0],
+                num_tokens_to_generate=args.num_tokens_to_generate[0],
+            )
+
+    # Otherwise, we benchmark across all combinations of dimensions
+    else:
+        main_config = generate_main_configs(
+            warmup_iterations=args.warmup,
+            measurement_iterations=args.iterations,
+            batch_size=args.batch_size[0],
+            sequence_length=args.sequence_length[0],
+            num_tokens_to_generate=args.num_tokens_to_generate[0],
+        )[0]
+        benchmark_configs = []
+        for num_tokens_to_generate in args.num_tokens_to_generate:
+            for sequence_length in args.sequence_length:
+                for batch_size in args.batch_size:
+                    cfg_dict = main_config.to_dict()
+                    cfg_dict["batch_size"] = batch_size
+                    cfg_dict["sequence_length"] = sequence_length
+                    cfg_dict["num_tokens_to_generate"] = num_tokens_to_generate
+                    cfg_dict.pop("name")
+                    benchmark_configs.append(BenchmarkConfig.from_dict(cfg_dict))
+
+    runner = BenchmarkRunner(logger, args.output_dir, args.commit_id)
+    results = runner.run_benchmarks(
+        args.model_id,
+        benchmark_configs,
+        args.num_tokens_to_profile,
+        pretty_print_summary=True,
+    )
+    # runner.save_results(args.model_id, results)
--- a/conftest.py
+++ b/conftest.py
@ -16,6 +16,7 @@
 # by pytest before any tests are run

 import doctest
+import os
 import sys
 import warnings
 from os.path import abspath, dirname, join
@ -27,6 +28,7 @@ from transformers.testing_utils import (
    HfDoctestModule,
    HfDocTestParser,
    is_torch_available,
+    patch_testing_methods_to_collect_info,
    patch_torch_compile_force_graph,
 )

@ -52,7 +54,6 @@ NOT_DEVICE_TESTS = {
    "test_gradient_checkpointing_backward_compatibility",
    "test_gradient_checkpointing_enable_disable",
    "test_torch_save_load",
-    "test_initialization",
    "test_forward_signature",
    "test_model_get_set_embeddings",
    "test_model_main_input_name",
@ -62,11 +63,8 @@ NOT_DEVICE_TESTS = {
    "test_load_save_without_tied_weights",
    "test_tied_weights_keys",
    "test_model_weights_reload_no_missing_tied_weights",
-    "test_mismatched_shapes_have_properly_initialized_weights",
-    "test_matched_shapes_have_loaded_weights_when_some_mismatched_shapes_exist",
+    "test_can_load_ignoring_mismatched_shapes",
    "test_model_is_small",
-    "test_tf_from_pt_safetensors",
-    "test_flax_from_pt_safetensors",
    "ModelTest::test_pipeline_",  # None of the pipeline tests from PipelineTesterMixin (of which XxxModelTest inherits from) are running on device
    "ModelTester::test_pipeline_",
    "/repo_utils/",
@ -91,6 +89,8 @@ def pytest_configure(config):
    config.addinivalue_line("markers", "torch_compile_test: mark test which tests torch compile functionality")
    config.addinivalue_line("markers", "torch_export_test: mark test which tests torch export functionality")

+    os.environ["DISABLE_SAFETENSORS_CONVERSION"] = "true"
+

 def pytest_collection_modifyitems(items):
    for item in items:
@ -145,3 +145,7 @@ if is_torch_available():
    # patch `torch.compile`: if `TORCH_COMPILE_FORCE_FULLGRAPH=1` (or values considered as true, e.g. yes, y, etc.),
    # the patched version will always run with `fullgraph=True`.
    patch_torch_compile_force_graph()
+
+
+if os.environ.get("PATCH_TESTING_METHODS_TO_COLLECT_OUTPUTS", "").lower() in ("yes", "true", "on", "y", "1"):
+    patch_testing_methods_to_collect_info()
--- a/docker/consistency.dockerfile
+++ b/docker/consistency.dockerfile
@ -1,15 +1,13 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 USER root
 ARG REF=main
 RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs
 ENV UV_PYTHON=/usr/local/bin/python
 RUN pip install uv && uv pip install --no-cache-dir -U pip setuptools GitPython
-RUN uv pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
-# tensorflow pin matching setup.py
+RUN uv pip install --no-cache-dir --upgrade 'torch<2.9' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
 RUN uv pip install --no-cache-dir pypi-kenlm
-RUN uv pip install --no-cache-dir "tensorflow-cpu<2.16" "tf-keras<2.16"
-RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,testing,torch-speech,vision]"
+RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[quality,testing,torch-speech,vision]"
 RUN git lfs install

 RUN uv pip uninstall transformers
--- a/docker/custom-tokenizers.dockerfile
+++ b/docker/custom-tokenizers.dockerfile
@ -1,8 +1,8 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
-RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake wget xz-utils build-essential g++5 libprotobuf-dev protobuf-compiler
+RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake wget xz-utils build-essential g++5 libprotobuf-dev protobuf-compiler git-lfs curl
 ENV UV_PYTHON=/usr/local/bin/python
 RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools

@ -15,12 +15,20 @@ RUN mv catch.hpp ../libs/
 RUN cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local
 RUN make install -j 10

+WORKDIR /

-RUN uv pip install --no-cache --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu
+RUN uv pip install --no-cache --upgrade 'torch<2.9' --index-url https://download.pytorch.org/whl/cpu
 RUN uv pip install --no-cache-dir  --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install  --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ja,testing,sentencepiece,jieba,spacy,ftfy,rjieba]" unidic unidic-lite
+RUN uv pip install  --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ja,testing,sentencepiece,spacy,ftfy,rjieba]" unidic unidic-lite
 # spacy is not used so not tested. Causes to failures. TODO fix later
 RUN uv run python -m unidic download
+
+# fetch test data and hub objects within CircleCI docker images to reduce even more connections
+# we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py`
+# the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers`
+RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py
+
+
 RUN uv pip uninstall transformers

 RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/examples-tf.dockerfile
+++ b/docker/examples-tf.dockerfile
@ -1,13 +0,0 @@
-FROM python:3.9-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git
-RUN apt-get install -y g++ cmake
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv
-RUN uv pip install --no-cache-dir -U pip setuptools albumentations seqeval
-RUN uv pip install  --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]"
-RUN uv pip install --no-cache-dir  "protobuf==3.20.3"
-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/examples-torch.dockerfile
+++ b/docker/examples-torch.dockerfile
@ -1,12 +1,19 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
-RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git ffmpeg
+RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git-lfs ffmpeg curl
 ENV UV_PYTHON=/usr/local/bin/python
 RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
+RUN uv pip install --no-cache-dir 'torch<2.9' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
 RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
 RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" seqeval albumentations jiwer
+
+# fetch test data and hub objects within CircleCI docker images to reduce even more connections
+# we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py`
+# the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers`
+RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py
+
+
 RUN uv pip uninstall transformers
 RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/exotic-models.dockerfile
+++ b/docker/exotic-models.dockerfile
@ -1,17 +1,24 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
-RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git libgl1-mesa-glx libgl1 g++ tesseract-ocr
+RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git libgl1 g++ tesseract-ocr git-lfs curl
 ENV UV_PYTHON=/usr/local/bin/python
 RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
+RUN uv pip install --no-cache-dir 'torch<2.9' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
 RUN uv pip install --no-cache-dir  --no-deps timm accelerate
-RUN uv pip install -U --upgrade-strategy eager --no-cache-dir pytesseract python-Levenshtein opencv-python nltk
+RUN uv pip install -U --no-cache-dir pytesseract python-Levenshtein opencv-python nltk
 # RUN uv pip install --no-cache-dir natten==0.15.1+torch210cpu -f https://shi-labs.com/natten/wheels
 RUN uv pip install  --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[testing, vision]" 'scikit-learn' 'torch-stft' 'nose'  'dataset'
 # RUN git clone https://github.com/facebookresearch/detectron2.git
 # RUN python3 -m pip install --no-cache-dir -e detectron2
 RUN uv pip install 'git+https://github.com/facebookresearch/detectron2.git@92ae9f0b92aba5867824b4f12aa06a22a60a45d3' --no-build-isolation
+
+# fetch test data and hub objects within CircleCI docker images to reduce even more connections
+# we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py`
+# the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers`
+RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py
+
+
 RUN uv pip uninstall transformers
 RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/jax-light.dockerfile
+++ b/docker/jax-light.dockerfile
@ -1,10 +0,0 @@
-FROM python:3.9-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git g++ cmake
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,testing,sentencepiece,flax-speech,vision]"
-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/pipeline-tf.dockerfile
+++ b/docker/pipeline-tf.dockerfile
@ -1,10 +0,0 @@
-FROM python:3.9-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake g++
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]"
-RUN uv pip install --no-cache-dir  "protobuf==3.20.3" tensorflow_probability
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/pipeline-torch.dockerfile
+++ b/docker/pipeline-torch.dockerfile
@ -1,11 +1,18 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
-RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git ffmpeg
+RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git ffmpeg curl
 ENV UV_PYTHON=/usr/local/bin/python
 RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
+RUN uv pip install --no-cache-dir 'torch<2.9' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
 RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
 RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]"
+
+# fetch test data and hub objects within CircleCI docker images to reduce even more connections
+# we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py`
+# the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers`
+RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py
+
+
 RUN uv pip uninstall transformers
--- a/docker/quality.dockerfile
+++ b/docker/quality.dockerfile
@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
--- a/docker/tf-light.dockerfile
+++ b/docker/tf-light.dockerfile
@ -1,12 +0,0 @@
-FROM python:3.9-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ pkg-config openssh-client git
-RUN apt-get install -y  cmake
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install  --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]"
-RUN uv pip install --no-cache-dir  "protobuf==3.20.3"
-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/torch-jax-light.dockerfile
+++ b/docker/torch-jax-light.dockerfile
@ -1,16 +0,0 @@
-FROM python:3.9-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-USER root
-RUN apt-get update &&  apt-get install -y libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-deps accelerate
-RUN uv pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,audio,sklearn,sentencepiece,vision,testing]"
-
-
-# RUN pip install --no-cache-dir "scipy<1.13" "transformers[flax,testing,sentencepiece,flax-speech,vision]"
-
-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/torch-light.dockerfile
+++ b/docker/torch-light.dockerfile
@ -1,11 +1,17 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
-RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git git-lfs ffmpeg
+RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git-lfs ffmpeg curl
 ENV UV_PYTHON=/usr/local/bin/python
 RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
+RUN uv pip install --no-cache-dir 'torch<2.9' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
 RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
 RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken,num2words,video]"
+
+# fetch test data and hub objects within CircleCI docker images to reduce even more connections
+# we don't need a full clone of `transformers` to run `fetch_hub_objects_for_ci.py`
+# the data are downloaded to the directory `/test_data` and during CircleCI's CI runtime, we need to move them to the root of `transformers`
+RUN mkdir test_data && cd test_data && curl -O https://raw.githubusercontent.com/huggingface/transformers/${REF}/utils/fetch_hub_objects_for_ci.py && python3 fetch_hub_objects_for_ci.py
+
 RUN uv pip uninstall transformers
--- a/docker/torch-tf-light.dockerfile
+++ b/docker/torch-tf-light.dockerfile
@ -1,19 +0,0 @@
-FROM python:3.9-slim
-ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
-RUN echo ${REF}
-USER root
-RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git git-lfs
-ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir  --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu 
-RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
-RUN git lfs install
-
-RUN uv pip install --no-cache-dir pypi-kenlm
-RUN uv pip install --no-cache-dir  "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,sentencepiece,vision,testing]"
-RUN uv pip install --no-cache-dir  "protobuf==3.20.3" librosa
-
-
-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/transformers-all-latest-gpu/Dockerfile
+++ b/docker/transformers-all-latest-gpu/Dockerfile
@ -12,8 +12,6 @@ SHELL ["sh", "-lc"]
 ARG PYTORCH='2.8.0'
 # Example: `cu102`, `cu113`, etc.
 ARG CUDA='cu126'
-# Disable kernel mapping for now until all tests pass
-ENV DISABLE_KERNEL_MAPPING=1

 RUN apt update
 RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg git-lfs
@ -26,13 +24,14 @@ RUN git clone https://github.com/huggingface/transformers && cd transformers &&
 # 1. Put several commands in a single `RUN` to avoid image/layer exporting issue. Could be revised in the future.
 # 2. Regarding `torch` part, We might need to specify proper versions for `torchvision` and `torchaudio`.
 #    Currently, let's not bother to specify their versions explicitly (so installed with their latest release versions).
-RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] && [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile && echo torch=$VERSION && [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA && python3 -m pip uninstall -y tensorflow tensorflow_text tensorflow_probability
-
-RUN python3 -m pip uninstall -y flax jax
+RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] && [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile && echo torch=$VERSION && [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA

 RUN python3 -m pip install --no-cache-dir -U timm

-RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract
+RUN [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git || echo "Don't install detectron2 with nightly torch"
+
+RUN python3 -m pip install --no-cache-dir pytesseract
+
 RUN python3 -m pip install -U "itsdangerous<2.1.0"

 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
@ -41,6 +40,8 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef

 # For bettertransformer
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum
+# For kernels
+RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/kernels@main#egg=kernels

 # For video model testing
 RUN python3 -m pip install --no-cache-dir av
@ -52,7 +53,7 @@ RUN python3 -m pip install --no-cache-dir bitsandbytes
 RUN python3 -m pip install --no-cache-dir quanto

 # After using A10 as CI runner, let's run FA2 tests
-RUN python3 -m pip uninstall -y ninja && python3 -m pip install --no-cache-dir ninja && python3 -m pip install flash-attn --no-cache-dir --no-build-isolation
+RUN [ "$PYTORCH" != "pre" ] && python3 -m pip uninstall -y ninja && python3 -m pip install --no-cache-dir ninja && python3 -m pip install flash-attn --no-cache-dir --no-build-isolation || echo "Don't install FA2 with nightly torch"

 # TODO (ydshieh): check this again
 # `quanto` will install `ninja` which leads to many `CUDA error: an illegal memory access ...` in some model tests
--- a/docker/transformers-gpu/Dockerfile
+++ b/docker/transformers-gpu/Dockerfile
@ -15,8 +15,8 @@ RUN apt update && \
 RUN python3 -m pip install --no-cache-dir --upgrade pip && \
    python3 -m pip install --no-cache-dir \
    jupyter \
-    tensorflow \
    torch
+RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/kernels@main#egg=kernels

 RUN git clone https://github.com/NVIDIA/apex
 RUN cd apex && \
--- a/docker/transformers-intel-cpu/Dockerfile
+++ b/docker/transformers-intel-cpu/Dockerfile
@ -0,0 +1,71 @@
+FROM intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu24.04 AS base
+LABEL maintainer="Hugging Face"
+SHELL ["/bin/bash", "-c"]
+
+ARG PYTHON_VERSION=3.12
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && \
+    apt-get install -y software-properties-common && \
+    add-apt-repository -y ppa:deadsnakes/ppa && \
+    apt-get update
+
+RUN apt-get update && \
+    apt-get -y install \
+    apt-utils \
+    build-essential \
+    ca-certificates \
+    clinfo \
+    curl \
+    git \
+    git-lfs \
+    vim \
+    numactl \
+    gnupg2 \
+    gpg-agent \
+    python3-dev \
+    python3-opencv \
+    unzip \
+    ffmpeg \
+    tesseract-ocr \
+    espeak-ng \
+    wget \
+    ncurses-term \
+    google-perftools \
+    libjemalloc-dev \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Use virtual env because Ubuntu:24 does not allowed pip on original python
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+ENV PATH="/root/.local/bin:$PATH"
+ENV VIRTUAL_ENV="/opt/venv"
+ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
+RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+RUN pip install --upgrade pip wheel
+RUN pip install torch torchvision torchaudio torchcodec --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
+RUN pip install av pyctcdecode pytesseract decord galore-torch fire scipy scikit-learn sentencepiece sentence_transformers sacremoses nltk rouge_score librosa soundfile mpi4py pytorch_msssim
+RUN pip install onnx optimum onnxruntime
+RUN pip install autoawq
+RUN pip install gptqmodel --no-build-isolation
+RUN pip install -U datasets timm transformers accelerate peft diffusers opencv-python kenlm evaluate
+RUN pip install -U intel-openmp
+
+# install bitsandbytes
+RUN git clone https://github.com/bitsandbytes-foundation/bitsandbytes.git && cd bitsandbytes/ && \
+    cmake -DCOMPUTE_BACKEND=cpu -S . && make && pip install . && cd ../
+
+# CPU don't need triton
+RUN pip uninstall triton -y
+
+ENV LD_PRELOAD=${LD_PRELOAD}:/opt/venv/lib/libiomp5.so:/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
+ENV KMP_AFFINITY=granularity=fine,compact,1,0
+
+RUN touch /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+RUN echo "#!/bin/bash" >> /entrypoint.sh
+RUN echo "/bin/bash" >> /entrypoint.sh
+
+ENTRYPOINT ["/entrypoint.sh"]
--- a/docker/transformers-past-gpu/Dockerfile
+++ b/docker/transformers-past-gpu/Dockerfile
@ -1,59 +0,0 @@
-ARG BASE_DOCKER_IMAGE
-FROM $BASE_DOCKER_IMAGE
-LABEL maintainer="Hugging Face"
-
-ARG DEBIAN_FRONTEND=noninteractive
-
-# Use login shell to read variables from `~/.profile` (to pass dynamic created variables between RUN commands)
-SHELL ["sh", "-lc"]
-
-RUN apt update
-RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg git-lfs libaio-dev
-RUN git lfs install
-RUN python3 -m pip install --no-cache-dir --upgrade pip
-
-ARG REF=main
-RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
-RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime]
-
-# When installing in editable mode, `transformers` is not recognized as a package.
-# this line must be added in order for python to be aware of transformers.
-RUN cd transformers && python3 setup.py develop
-
-ARG FRAMEWORK
-ARG VERSION
-
-# Control `setuptools` version to avoid some issues
-RUN [ "$VERSION" != "1.10" ] && python3 -m pip install -U setuptools || python3 -m pip install -U "setuptools<=59.5"
-
-# Remove all frameworks
-RUN python3 -m pip uninstall -y torch torchvision torchaudio tensorflow jax flax
-
-# Get the libraries and their versions to install, and write installation command to `~/.profile`.
-RUN python3 ./transformers/utils/past_ci_versions.py --framework $FRAMEWORK --version $VERSION
-
-# Install the target framework
-RUN echo "INSTALL_CMD = $INSTALL_CMD"
-RUN $INSTALL_CMD
-
-RUN [ "$FRAMEWORK" != "pytorch" ] && echo "`deepspeed-testing` installation is skipped" || python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]
-
-# Remove `accelerate`: it requires `torch`, and this causes import issues for TF-only testing
-# We will install `accelerate@main` in Past CI workflow file
-RUN python3 -m pip uninstall -y accelerate
-
-# Uninstall `torch-tensorrt` and `apex` shipped with the base image
-RUN python3 -m pip uninstall -y torch-tensorrt apex
-
-# Pre-build **nightly** release of DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
-RUN python3 -m pip uninstall -y deepspeed
-# This has to be run inside the GPU VMs running the tests. (So far, it fails here due to GPU checks during compilation.)
-# Issue: https://github.com/deepspeedai/DeepSpeed/issues/2010
-# RUN git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build && \
-#    DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
-
-RUN python3 -m pip install -U "itsdangerous<2.1.0"
-
-# When installing in editable mode, `transformers` is not recognized as a package.
-# this line must be added in order for python to be aware of transformers.
-RUN cd transformers && python3 setup.py develop
--- a/docker/transformers-pytorch-amd-gpu/Dockerfile
+++ b/docker/transformers-pytorch-amd-gpu/Dockerfile
@ -20,14 +20,9 @@ WORKDIR /
 ADD https://api.github.com/repos/huggingface/transformers/git/refs/heads/main version.json
 RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF

-# On ROCm, torchcodec is required to decode audio files
-# RUN python3 -m pip install --no-cache-dir torchcodec
 # Install transformers
 RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch,testing,video,audio]

-# Remove tensorflow and flax as they are no longer supported by transformers
-RUN python3 -m pip uninstall -y tensorflow flax
-
 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.
 RUN cd transformers && python3 setup.py develop
@ -37,3 +32,13 @@ RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y

 # `kernels` may causes many failing tests
 RUN python3 -m pip uninstall -y kernels
+
+# On ROCm, torchcodec is required to decode audio files and 0.4 or 0.6 fails
+RUN python3 -m pip install --no-cache-dir "torchcodec==0.5"
+
+# Install flash attention from source. Tested with commit 6387433156558135a998d5568a9d74c1778666d8
+RUN git clone https://github.com/ROCm/flash-attention/ -b tridao && \
+    cd flash-attention && \
+    GPU_ARCHS="gfx942" python setup.py install
+
+RUN python3 -m pip install --no-cache-dir einops
--- a/docker/transformers-pytorch-gpu/Dockerfile
+++ b/docker/transformers-pytorch-gpu/Dockerfile
@ -25,8 +25,6 @@ RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch';
 RUN [ ${#TORCH_VISION} -gt 0 ] && VERSION='torchvision=='TORCH_VISION'.*' ||  VERSION='torchvision'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA
 RUN [ ${#TORCH_AUDIO} -gt 0 ] && VERSION='torchaudio=='TORCH_AUDIO'.*' ||  VERSION='torchaudio'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA

-RUN python3 -m pip uninstall -y tensorflow flax
-
 RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract
 RUN python3 -m pip install -U "itsdangerous<2.1.0"

--- a/docker/transformers-quantization-latest-gpu/Dockerfile
+++ b/docker/transformers-quantization-latest-gpu/Dockerfile
@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
+FROM nvidia/cuda:12.6.0-cudnn-devel-ubuntu22.04
 LABEL maintainer="Hugging Face"

 ARG DEBIAN_FRONTEND=noninteractive
@ -9,11 +9,9 @@ SHELL ["sh", "-lc"]
 # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
 # to be used as arguments for docker build (so far).

-ARG PYTORCH='2.6.0'
+ARG PYTORCH='2.8.0'
 # Example: `cu102`, `cu113`, etc.
-ARG CUDA='cu121'
-# Disable kernel mapping for quantization tests
-ENV DISABLE_KERNEL_MAPPING=1
+ARG CUDA='cu126'

 RUN apt update
 RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg
@ -30,31 +28,20 @@ RUN python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio tor

 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate

-# needed in bnb and awq
-RUN python3 -m pip install --no-cache-dir einops
-
-# Add bitsandbytes for mixed int8 testing
-RUN python3 -m pip install --no-cache-dir bitsandbytes
-
-# Add gptqmodel for gtpq quantization testing, installed from source for pytorch==2.6.0 compatibility
-RUN python3 -m pip install lm_eval
-RUN git clone https://github.com/ModelCloud/GPTQModel.git && cd GPTQModel && pip install -v . --no-build-isolation
-
 # Add optimum for gptq quantization testing
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum

 # Add PEFT
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/peft@main#egg=peft

-# Add aqlm for quantization testing
-RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2
+# needed in bnb and awq
+RUN python3 -m pip install --no-cache-dir einops

-# Add vptq for quantization testing
-RUN pip install vptq
+# Add bitsandbytes
+RUN python3 -m pip install --no-cache-dir bitsandbytes

-# Add spqr for quantization testing
-# Commented for now as No matching distribution found we need to reach out to the authors
-# RUN python3 -m pip install --no-cache-dir spqr_quant[gpu]
+# # Add gptqmodel
+# RUN python3 -m pip install --no-cache-dir gptqmodel

 # Add hqq for quantization testing
 RUN python3 -m pip install --no-cache-dir hqq
@ -63,25 +50,11 @@ RUN python3 -m pip install --no-cache-dir hqq
 RUN python3 -m pip install --no-cache-dir gguf

 # Add autoawq for quantization testing
-# New release v0.2.8
 RUN python3 -m pip install --no-cache-dir autoawq[kernels]

 # Add quanto for quantization testing
 RUN python3 -m pip install --no-cache-dir optimum-quanto

-# Add eetq for quantization testing
-RUN git clone https://github.com/NetEase-FuXi/EETQ.git && cd EETQ/ && git submodule update --init --recursive && pip install .
-
-# # Add flute-kernel and fast_hadamard_transform for quantization testing
-# # Commented for now as they cause issues with the build
-# # TODO: create a new workflow to test them
-# RUN python3 -m pip install --no-cache-dir flute-kernel==0.4.1
-# RUN python3 -m pip install --no-cache-dir git+https://github.com/Dao-AILab/fast-hadamard-transform.git
-
-# Add fp-quant for quantization testing
-# Requires py3.11 but our CI runs on 3.9
-# RUN python3 -m pip install --no-cache-dir "fp-quant>=0.1.6"
-
 # Add compressed-tensors for quantization testing
 RUN python3 -m pip install --no-cache-dir compressed-tensors

@ -89,7 +62,10 @@ RUN python3 -m pip install --no-cache-dir compressed-tensors
 RUN python3 -m pip install --no-cache-dir amd-quark

 # Add AutoRound for quantization testing
-RUN python3 -m pip install --no-cache-dir "auto-round>=0.5.0"
+RUN python3 -m pip install --no-cache-dir auto-round
+
+# Add torchao for quantization testing
+RUN python3 -m pip install --no-cache-dir torchao

 # Add transformers in editable mode
 RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch]
@ -103,3 +79,27 @@ RUN python3 -m pip uninstall -y flash-attn
 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.
 RUN cd transformers && python3 setup.py develop
+
+# Add fp-quant for quantization testing
+RUN python3 -m pip install --no-cache-dir "fp-quant>=0.2.0"
+
+# Low usage or incompatible lib, will enable later on
+
+# # Add aqlm for quantization testing
+# RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2
+
+# # Add vptq for quantization testing
+# RUN pip install vptq
+
+# Add spqr for quantization testing
+# Commented for now as No matching distribution found we need to reach out to the authors
+# RUN python3 -m pip install --no-cache-dir spqr_quant[gpu]
+
+# # Add eetq for quantization testing
+# RUN git clone https://github.com/NetEase-FuXi/EETQ.git && cd EETQ/ && git submodule update --init --recursive && pip install .
+
+# # Add flute-kernel and fast_hadamard_transform for quantization testing
+# # Commented for now as they cause issues with the build
+# # TODO: create a new workflow to test them
+# RUN python3 -m pip install --no-cache-dir flute-kernel==0.4.1
+# RUN python3 -m pip install --no-cache-dir git+https://github.com/Dao-AILab/fast-hadamard-transform.git
--- a/docker/transformers-tensorflow-gpu/Dockerfile
+++ b/docker/transformers-tensorflow-gpu/Dockerfile
@ -1,25 +0,0 @@
-FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04
-LABEL maintainer="Hugging Face"
-
-ARG DEBIAN_FRONTEND=noninteractive
-
-RUN apt update
-RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg
-RUN python3 -m pip install --no-cache-dir --upgrade pip
-
-ARG REF=main
-RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
-RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-tensorflow,testing]
-
-# If set to nothing, will install the latest version
-ARG TENSORFLOW='2.13'
-
-RUN [ ${#TENSORFLOW} -gt 0 ] && VERSION='tensorflow=='$TENSORFLOW'.*' ||  VERSION='tensorflow'; python3 -m pip install --no-cache-dir -U $VERSION
-RUN python3 -m pip uninstall -y torch flax
-RUN python3 -m pip install -U "itsdangerous<2.1.0"
-
-RUN python3 -m pip install --no-cache-dir -U "tensorflow_probability<0.22"
-
-# When installing in editable mode, `transformers` is not recognized as a package.
-# this line must be added in order for python to be aware of transformers.
-RUN cd transformers && python3 setup.py develop
--- a/docs/TRANSLATING.md
+++ b/docs/TRANSLATING.md
@ -50,7 +50,7 @@ Begin translating the text!

 1. Start with the `_toctree.yml` file that corresponds to your documentation chapter. This file is essential for rendering the table of contents on the website.

-    - If the `_toctree.yml` file doesn’t exist for your language, create one by copying the English version and removing unrelated sections.
+    - If the `_toctree.yml` file doesn't exist for your language, create one by copying the English version and removing unrelated sections.
    - Ensure it is placed in the `docs/source/LANG-ID/` directory.

    Here’s an example structure for the `_toctree.yml` file:
--- a/docs/source/ar/_toctree.yml
+++ b/docs/source/ar/_toctree.yml
@ -123,8 +123,6 @@
    title: تشغيل التدريب على Amazon SageMaker
  - local: serialization
    title: التصدير إلى ONNX
-  - local: tflite
-    title: التصدير إلى TFLite
  - local: torchscript
    title: التصدير إلى TorchScript
  - local: notebooks
@ -184,8 +182,6 @@
 #       title: التدريب الفعال على وحدة المعالجة المركزية (CPU)
 #     - local: perf_train_cpu_many
 #       title: التدريب الموزع لوحدة المعالجة المركزية (CPU)
-#     - local: perf_train_tpu_tf
-#       title: التدريب على (TPU) باستخدام TensorFlow
 #     - local: perf_train_special
 #       title: تدريب PyTorch على Apple silicon
 #     - local: perf_hardware
@ -203,8 +199,6 @@
 #     title: إنشاء نموذج كبير
 #   - local: debugging
 #     title: تصحيح الأخطاء البرمجية
-#   - local: tf_xla
-#     title: تكامل XLA لنماذج TensorFlow
 #   - local: perf_torch_compile
 #     title: تحسين الاستدلال باستخدام `torch.compile()`
 #   title: الأداء وقابلية التوسع
@ -260,8 +254,6 @@
 #       title: التكوين
 #     - local: main_classes/data_collator
 #       title: مجمع البيانات
-#     - local: main_classes/keras_callbacks
-#       title: استدعاءات Keras
 #     - local: main_classes/logging
 #       title: التسجيل
 #     - local: main_classes/model
--- a/docs/source/ar/autoclass_tutorial.md
+++ b/docs/source/ar/autoclass_tutorial.md
@ -52,7 +52,7 @@
    <figcaption class="mt-2 text-center text-sm text-gray-500">الصورة توضح مخطط مراحل نموذج Swin.</figcaption>
 </div>

-يسمح لك [`AutoBackbone`] باستخدام النماذج المُدربة مسبقًا كعمود فقري للحصول على خرائط ميزات من مراحل مختلفة من العمود الفقري. يجب عليك تحديد أحد المعلمات التالية في [`~PretrainedConfig.from_pretrained`]:
+يسمح لك [`AutoBackbone`] باستخدام النماذج المُدربة مسبقًا كعمود فقري للحصول على خرائط ميزات من مراحل مختلفة من العمود الفقري. يجب عليك تحديد أحد المعلمات التالية في [`~PreTrainedConfig.from_pretrained`]:

 * `out_indices` هو فهرس الطبقة التي تريد الحصول على خريطة الميزات منها
 * `out_features` هو اسم الطبقة التي تريد الحصول على خريطة الميزات منها
@ -115,8 +115,6 @@

 ## النموذج التلقائي (AutoModel)

-<frameworkcontent>
-<pt>
 تسمح لك فئات `AutoModelFor` بتحميل نموذج مُدرب مسبقًا لمهمة معينة (راجع [هنا](model_doc/auto) للحصول على قائمة كاملة بالمهام المتاحة). على سبيل المثال، قم بتحميل نموذج لتصنيف التسلسل باستخدام [`AutoModelForSequenceClassification.from_pretrained`]:

 ```py
@ -143,25 +141,4 @@


 بشكل عام، نوصي باستخدام فئة `AutoTokenizer` وفئة `AutoModelFor` لتحميل مثيلات مُدربة مسبقًا من النماذج. سيساعدك هذا في تحميل البنية الصحيحة في كل مرة. في البرنامج التعليمي التالي، تعرف على كيفية استخدام المحلل اللغوي ومعالج الصور ومستخرج الميزات والمعالج الذي تم تحميله حديثًا لمعالجة مجموعة بيانات للضبط الدقيق.
-</pt>

-<tf>
-أخيرًا، تسمح لك فئات `TFAutoModelFor` بتحميل نموذج مُدرب مسبقًا لمهمة معينة (راجع [هنا](model_doc/auto) للحصول على قائمة كاملة بالمهام المتاحة). على سبيل المثال، قم بتحميل نموذج لتصنيف التسلسل باستخدام [`TFAutoModelForSequenceClassification.from_pretrained`]:
-
-```py
->>> from transformers import TFAutoModelForSequenceClassification
-
->>> model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-أعد استخدام نفس نقطة التفتيش لتحميل بنية لمهمة مختلفة:
-
-```py
->>> from transformers import TFAutoModelForTokenClassification
-
->>> model = TFAutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-بشكل عام، نوصي باستخدام فئة `AutoTokenizer` وفئة `TFAutoModelFor` لتحميل نسخ لنماذج مُدربة مسبقًا. سيساعدك هذا في تحميل البنية الصحيحة في كل مرة. في البرنامج التعليمي التالي، ستتعرف على كيفية استخدام المُجزّئ اللغوي ومعالج الصور ومستخرج الميزات والمعالج الذي تم تحميله حديثًا لمعالجة مجموعة بيانات للضبط الدقيق.
-</tf>
-</frameworkcontent>
--- a/docs/source/ar/create_a_model.md
+++ b/docs/source/ar/create_a_model.md
@ -54,19 +54,19 @@ DistilBertConfig {
 
 ```

-يمكن تعديل خصائص النموذج المدرب مسبقًا في دالة [`~PretrainedConfig.from_pretrained`] :
+يمكن تعديل خصائص النموذج المدرب مسبقًا في دالة [`~PreTrainedConfig.from_pretrained`] :

 ```py
 >>> my_config = DistilBertConfig.from_pretrained("distilbert/distilbert-base-uncased", activation="relu", attention_dropout=0.4)
 ```

-بمجرد أن تصبح راضيًا عن تكوين نموذجك، يمكنك حفظه باستخدام [`~PretrainedConfig.save_pretrained`]. يتم تخزين ملف التكوين الخاص بك على أنه ملف JSON في دليل الحفظ المحدد:
+بمجرد أن تصبح راضيًا عن تكوين نموذجك، يمكنك حفظه باستخدام [`~PreTrainedConfig.save_pretrained`]. يتم تخزين ملف التكوين الخاص بك على أنه ملف JSON في دليل الحفظ المحدد:

 ```py
 >>> my_config.save_pretrained(save_directory="./your_model_save_path")
 ```

-لإعادة استخدام ملف التكوين، قم بتحميله باستخدام [`~PretrainedConfig.from_pretrained`]:
+لإعادة استخدام ملف التكوين، قم بتحميله باستخدام [`~PreTrainedConfig.from_pretrained`]:

 ```py
 >>> my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
@ -81,8 +81,6 @@ DistilBertConfig {

 الخطوة التالية هي إنشاء [نموذج](main_classes/models). النموذج - ويُشار إليه أحيانًا باسم البنية - يُحدد وظيفة كل طبقة والعمليات الحسابية المُنفذة. تُستخدم خصائص مثل `num_hidden_layers` من التكوين لتحديد هذه البنية. تشترك جميع النماذج في  فئة أساسية واحدة هي [`PreTrainedModel`] وبعض الوظائف المُشتركة مثل غيير حجم مُدخلات الكلمات وتقليص رؤوس آلية الانتباه الذاتي. بالإضافة إلى ذلك، فإن جميع النماذج هي  فئات فرعية إما من [`torch.nn.Module`](https://pytorch.org/docs/stable/generated/torch.nn.Module.html)، [`tf.keras.Model`](https://www.tensorflow.org/api_docs/python/tf/keras/Model) أو [`flax.linen.Module`](https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html) . هذا يعني النماذج متوافقة مع كل استخدام لإطار العمل الخاص بها.

-<frameworkcontent>
-<pt>
 قم بتحميل خصائص التكوين المخصصة الخاصة بك في النموذج:

 ```py
@ -105,39 +103,11 @@ DistilBertConfig {
 ```py
 >>> model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased"، config=my_config)
 ```
-</pt>
-<tf>
-قم بتحميل خصائص التكوين المُخصصة الخاصة بك في النموذج:
-
-```py
->>> from transformers import TFDistilBertModel
-
->>> my_config = DistilBertConfig.from_pretrained("./your_model_save_path/my_config.json")
->>> tf_model = TFDistilBertModel(my_config)
-```
-
-هذا ينشئ نموذجًا بقيم عشوائية بدلاً من الأوزان المُدربة مسبقًا. لن يكون هذا النموذج مفيدًا حتى يتم تدريبه. تُعد عملية التدريب مكلفة وتستغرق وقتًا طويلاً. من الأفضل بشكل عام استخدام نموذج مُدرب مسبقًا للحصول على نتائج أفضل بشكل أسرع، مع استخدام جزء بسيط فقط من الموارد المطلوبة للتدريب.
-
-قم بإنشاء نموذج مُدرب مسبقًا باستخدام [`~TFPreTrainedModel.from_pretrained`]:
-
-```py
->>> tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-عندما تقوم بتحميل الأوزان المُدربة مسبقًا،يتم تحميل إعدادات النموذج الافتراضي تلقائيًا إذا كان النموذج من مكتبة 🤗 Transformers. ومع ذلك، يمكنك أيضًا استبدال - بعض أو كل - إعدادات النموذج  الافتراضية بإعداداتك الخاصة:
-
-```py
->>> tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased"، config=my_config)
-```
-</tf>
-</frameworkcontent>

 ### رؤوس النموذج

 في هذه المرحلة، لديك نموذج DistilBERT الأساسي الذي يخرج *حالات الكامنة*. تُمرَّر هذه الحالات الكامنة كمدخلات لرأس النموذج لإنتاج  المخرجات النهائية. توفر مكتبة 🤗 Transformers رأس نموذج مختلف لكل مهمة طالما أن النموذج يدعم المهمة (أي لا يمكنك استخدام DistilBERT لمهمة تسلسل إلى تسلسل مثل الترجمة).

-<frameworkcontent>
-<pt>
 على سبيل المثال، [`DistilBertForSequenceClassification`] هو نموذج DistilBERT الأساس  مزودًا برأس تصنيف تسلسلي.  يُشكّل رأس التصنيف التسلسلي طبقة خطية فوق المخرجات المجمعة.

 ```py
@ -153,25 +123,6 @@ DistilBertConfig {

 >>> model = DistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
 ```
-</pt>
-<tf>
-على سبيل المثال، [`TFDistilBertForSequenceClassification`] هو نموذج DistilBERT الأساسي برأس تصنيف تسلسل. رأس التصنيف التسلسلي هو طبقة خطية أعلى المخرجات المجمعة.
-
-```py
->>> from transformers import TFDistilBertForSequenceClassification
-
->>> tf_model = TFDistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-أعد استخدام هذا نقطة التحقق لمهمة أخرى عن طريق التبديل إلى رأس نموذج مختلف. لمهمة الإجابة على الأسئلة، ستستخدم رأس النموذج [`TFDistilBertForQuestionAnswering`]. رأس الإجابة على الأسئلة مشابه لرأس التصنيف التسلسلي باستثناء أنه طبقة خطية أعلى حالات الإخراج المخفية.
-
-```py
->>> from transformers import TFDistilBertForQuestionAnswering
-
->>> tf_model = TFDistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
-```
-</tf>
-</frameworkcontent>

 ## مجزئ النصوص

--- a/docs/source/ar/custom_models.md
+++ b/docs/source/ar/custom_models.md
@ -20,11 +20,11 @@
 في مثالنا، سنعدّل بعض الوسائط في فئة ResNet التي قد نرغب في ضبطها. ستعطينا التكوينات المختلفة أنواع ResNets المختلفة الممكنة. سنقوم بتخزين هذه الوسائط بعد التحقق من صحته.

 ```python
-from transformers import PretrainedConfig
+from transformers import PreTrainedConfig
 from typing import List


-class ResnetConfig(PretrainedConfig):
+class ResnetConfig(PreTrainedConfig):
    model_type = "resnet"

    def __init__(
@ -58,11 +58,11 @@ class ResnetConfig(PretrainedConfig):
 ```
 الأشياء الثلاثة المهمة التي يجب تذكرها عند كتابة تكوينك الخاص هي:

- يجب أن ترث من `PretrainedConfig`،
- يجب أن تقبل دالة  `__init__` الخاصة بـ `PretrainedConfig` أي معامﻻت إضافية kwargs،
+- يجب أن ترث من `PreTrainedConfig`،
+- يجب أن تقبل دالة  `__init__` الخاصة بـ `PreTrainedConfig` أي معامﻻت إضافية kwargs،
 - يجب تمرير هذه المعامﻻت الإضافية إلى دالة `__init__` فى الفئة الأساسية الاعلى.

-يضمن الإرث حصولك على جميع الوظائف من مكتبة 🤗 Transformers، في حين أن القيدين التانى والثالث يأتيان من حقيقة أن `PretrainedConfig` لديه المزيد من الحقول أكثر من تلك التي تقوم بتعيينها. عند إعادة تحميل تكوين باستخدام طريقة `from_pretrained`، يجب أن يقبل تكوينك هذه الحقول ثم إرسالها إلى الفئة الأساسية الأعلى.
+يضمن الإرث حصولك على جميع الوظائف من مكتبة 🤗 Transformers، في حين أن القيدين التانى والثالث يأتيان من حقيقة أن `PreTrainedConfig` لديه المزيد من الحقول أكثر من تلك التي تقوم بتعيينها. عند إعادة تحميل تكوين باستخدام طريقة `from_pretrained`، يجب أن يقبل تكوينك هذه الحقول ثم إرسالها إلى الفئة الأساسية الأعلى.

 تحديد `model_type` لتكوينك (هنا `model_type="resnet"`) ليس إلزاميًا، ما لم ترغب في
 تسجيل نموذجك باستخدام الفئات التلقائية (راجع القسم الأخير).
@ -82,7 +82,7 @@ resnet50d_config.save_pretrained("custom-resnet")
 resnet50d_config = ResnetConfig.from_pretrained("custom-resnet")
 ```

-يمكنك أيضًا استخدام أي طريقة أخرى من فئة [`PretrainedConfig`]، مثل [`~PretrainedConfig.push_to_hub`] لتحميل تكوينك مباشرة إلى Hub.
+يمكنك أيضًا استخدام أي طريقة أخرى من فئة [`PreTrainedConfig`]، مثل [`~PreTrainedConfig.push_to_hub`] لتحميل تكوينك مباشرة إلى Hub.

 ## كتابة نموذج مخصص

--- a/docs/source/ar/llm_tutorial.md
+++ b/docs/source/ar/llm_tutorial.md
@ -60,10 +60,10 @@ pip install transformers bitsandbytes>=0.39.0 -q
 أولاً، تحتاج إلى تحميل النموذج.

 ```py
->>> from transformers import AutoModelForCausalLM
+>>> from transformers import AutoModelForCausalLM, BitsAndBytesConfig

 >>> model = AutoModelForCausalLM.from_pretrained(
-...     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
+...     "mistralai/Mistral-7B-v0.1", device_map="auto", quantization_config=BitsAndBytesConfig(load_in_4bit=True)
 ... )
 ```

@ -113,12 +113,12 @@ pip install transformers bitsandbytes>=0.39.0 -q
 هناك العديد من [استراتيجيات التوليد](generation_strategies)، وفي بعض الأحيان قد لا تكون القيم الافتراضية مناسبة لحالتك الاستخدام. إذا لم تكن الإخراج الخاصة بك متوافقة مع ما تتوقعه، فقد قمنا بإنشاء قائمة بأكثر الأخطاء الشائعة وكيفية تجنبها.

 ```py
->>> from transformers import AutoModelForCausalLM, AutoTokenizer
+>>> from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

 >>> tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
 >>> tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
 >>> model = AutoModelForCausalLM.from_pretrained(
-...     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
+...     "mistralai/Mistral-7B-v0.1", device_map="auto", quantization_config=BitsAndBytesConfig(load_in_4bit=True)
 ... )
 ```

@ -192,7 +192,7 @@ LLMs هي [معماريات فك التشفير فقط](https://huggingface.co/l
 ```python
 >>> tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha")
 >>> model = AutoModelForCausalLM.from_pretrained(
-...     "HuggingFaceH4/zephyr-7b-alpha", device_map="auto", load_in_4bit=True
+...     "HuggingFaceH4/zephyr-7b-alpha", device_map="auto", quantization_config=BitsAndBytesConfig(load_in_4bit=True)
 ... )
 >>> set_seed(0)
 >>> prompt = """How many helicopters can a human eat in one sitting? Reply as a thug."""
--- a/docs/source/ar/llm_tutorial_optimization.md
+++ b/docs/source/ar/llm_tutorial_optimization.md
@ -231,7 +231,7 @@ flush()
 دعنا نرى ما هو استهلاك ذاكرة GPU الذروة الذي يوفره تكميم 4 بت. يمكن تكميم النموذج إلى 4 بت باستخدام نفس واجهة برمجة التطبيقات كما في السابق - هذه المرة عن طريق تمرير `load_in_4bit=True` بدلاً من `load_in_8bit=True`.

 ```python
-model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_4bit=True, pad_token_id=0)
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", quantization_config=BitsAndBytesConfig(load_in_4bit=True), pad_token_id=0)

 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

@ -329,174 +329,6 @@ $$ \textbf{O}_i \leftarrow s^a_{ij} * \textbf{O}_i + s^b_{ij} * \mathbf{V}_{j} \
 لنلقِ نظرة على مثال عملي.


-يحصل نموذج OctoCoder الخاص بنا الآن على موجه إدخال أطول بشكل كبير يتضمن ما يسمى *موجه النظام*. تُستخدم موجهات النظام لتوجيه LLM إلى مساعد أفضل مصمم لمهام المستخدمين.
-فيما يلي، نستخدم موجه النظام الذي سيجعل OctoCoder مساعد ترميز أفضل.
-
-```python
-system_prompt = """Below are a series of dialogues between various people and an AI technical assistant.
-The assistant tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble but knowledgeable.
-The assistant is happy to help with code questions and will do their best to understand exactly what is needed.
-It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer.
-That said, the assistant is practical really does its best, and doesn't let caution get too much in the way of being useful.
-
-The Starcoder models are a series of 15.5B parameter models trained on 80+ programming languages from The Stack (v1.2) (excluding opt-out requests).
-The model uses Multi Query Attention, was trained using the Fill-in-the-Middle objective, and with 8,192 tokens context window for a trillion tokens of heavily deduplicated data.
-----
-
-Question: Write a function that takes two lists and returns a list that has alternating elements from each input list.
-
-Answer: Sure. Here is a function that does that.
-
-def alternating(list1, list2):
-   results = []
-   for i in range(len(list1)):
-       results.append(list1[i])
-       results.append(list2[i])
-   return results
-
-Question: Can you write some test cases for this function?
-
-Answer: Sure, here are some tests.
-
-assert alternating([10, 20, 30], [1, 2, 3]) == [10, 1, 20, 2, 30, 3]
-assert alternating([True, False], [4, 5]) == [True, 4, False, 5]
-assert alternating([], []) == []
-
-Question: Modify the function so that it returns all input elements when the lists have uneven length. The elements from the longer list should be at the end.
-
-Answer: Here is the modified function.
-
-def alternating(list1, list2):
-   results = []
-   for i in range(min(len(list1), len(list2))):
-       results.append(list1[i])
-       results.append(list2[i])
-   if len(list1) > len(list2):
-       results.extend(list1[i+1:])
-   else:
-       results.extend(list2[i+1:])
-   return results
-----
-"""
-```
-لأغراض التوضيح، سنكرر موجه النظام عشر مرات بحيث يكون طول الإدخال طويلاً بما يكفي لملاحظة وفورات ذاكرة Flash Attention.
-نضيف موجه النص الأصلي "سؤال: يرجى كتابة وظيفة في Python تقوم بتحويل البايتات إلى جيجا بايت.
-
-```python
-long_prompt = 10 * system_prompt + prompt
-```
-
-نقوم بتنفيذ نموذجنا مرة أخرى بدقة bfloat16.
-
-```python
-model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", dtype=torch.bfloat16, device_map="auto")
-tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder")
-
-pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
-```
-
-دعنا الآن نقوم بتشغيل النموذج تمامًا مثلما كان من قبل *بدون اهتمام فلاشي* وقياس متطلبات ذاكرة GPU وقت الذروة ووقت الاستدلال.
-
-```python
-import time
-
-start_time = time.time()
-result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
-
-print(f"Generated in {time.time() - start_time} seconds.")
-result
-```
-
-**الإخراج**:
-```
-تم التوليد في 10.96854019165039 ثانية.
-بالتأكيد. إليك وظيفة للقيام بذلك.
-
-def bytes_to_giga(bytes):
-return bytes / 1024 / 1024 / 1024
-
-الإجابة: بالتأكيد. إليك وظيفة للقيام بذلك.
-
-ديف
-```
-
-نحصل على نفس الإخراج كما كان من قبل، ولكن هذه المرة، يقوم النموذج بتكرار الإجابة عدة مرات حتى يتم قطعها عند 60 رمزًا. ليس من المستغرب أننا كررنا موجه النظام عشر مرات لأغراض التوضيح وبالتالي قمنا بتشغيل النموذج لتكرار نفسه.
-
-**ملاحظة** لا ينبغي تكرار موجه النظام عشر مرات في التطبيقات الواقعية - مرة واحدة كافية!
-
-دعنا نقيس متطلبات ذاكرة GPU وقت الذروة.
-
-```python
-bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
-```
-
-**الإخراج**:
-```
-37.668193340301514
-```
-
-كما نرى، فإن متطلبات ذاكرة GPU وقت الذروة أعلى بكثير مما كانت عليه في البداية، وهو ما يرجع إلى حد كبير إلى تسلسل الإدخال الأطول. أيضًا، يستغرق التوليد أكثر من دقيقة بقليل الآن.
-
-نستدعي `flush()` لتحرير ذاكرة GPU لتجربتنا التالية.
-
-```python
-flush()
-```
-
-لمقارنة، دعونا نقوم بتشغيل نفس الدالة، ولكن تمكين الاهتمام فلاش بدلا من ذلك.
-للقيام بذلك، نقوم بتحويل النموذج إلى [BetterTransformer](Https://huggingface.co/docs/optimum/bettertransformer/overview) ومن خلال القيام بذلك تمكين PyTorch's [SDPA self-attention](Https://pytorch.org/docs/master/generated/torch.nn.functional.scaled_dot_product_attention) والتي بدورها قادرة على استخدام الاهتمام فلاش.
-
-```python
-model.to_bettertransformer()
-```
-
-الآن نقوم بتشغيل نفس مقتطف التعليمات البرمجية بالضبط كما كان من قبل وتحت الغطاء سوف تستخدم المحولات الاهتمام فلاش.
-
-```py
-start_time = time.time()
-with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
-    result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
-
-print(f"Generated in {time.time() - start_time} seconds.")
-result
-```
-
-**الإخراج**:
-```
-تم التوليد في 3.0211617946624756 ثانية.
-بالتأكيد. إليك وظيفة للقيام بذلك.
-
-def bytes_to_giga(bytes):
-return bytes / 1024 / 1024 / 1024
-
-الإجابة: بالتأكيد. إليك وظيفة للقيام بذلك.
-
-ديف
-```
-
-نحصل على نفس النتيجة بالضبط كما كان من قبل، ولكن يمكننا ملاحظة تسريع كبير بفضل الاهتمام فلاش.
-
-دعنا نقيس استهلاك الذاكرة لآخر مرة.
-
-```python
-bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
-```
-
-**الإخراج**:
-```
-32.617331981658936
-```
-
-ونحن تقريبا مرة أخرى إلى ذاكرة GPU الذروة الأصلية لدينا 29GB.
-
-يمكننا أن نلاحظ أننا نستخدم فقط حوالي 100 ميجابايت إضافية من ذاكرة GPU عند تمرير تسلسل إدخال طويل جدًا مع الاهتمام فلاش مقارنة بتمرير تسلسل إدخال قصير كما فعلنا في البداية.
-
-```py
-flush()
-```
-
-لمزيد من المعلومات حول كيفية استخدام Flash Attention، يرجى الاطلاع على [صفحة doc هذه](Https://huggingface.co/docs/transformers/en/perf_infer_gpu_one#flashattention-2).
-
 ## 3. الابتكارات المعمارية

 حتى الآن، نظرنا في تحسين الكفاءة الحسابية والذاكرة من خلال:
@ -640,7 +472,7 @@ for _ in range(5):
  next_token_id = torch.argmax(next_logits, dim=-1)

  print("shape of input_ids", next_token_id.shape)
-  print("length of key-value cache", len(past_key_values[0][0]))  # past_key_values are of shape [num_layers, 0 for k, 1 for v, batch_size, length, hidden_dim]
+  print("length of key-value cache", past_key_values.get_seq_length())  # past_key_values are of shape [num_layers, 0 for k, 1 for v, batch_size, length, hidden_dim]
  generated_tokens.append(next_token_id.item())

 generated_text = tokenizer.batch_decode(generated_tokens)
--- a/docs/source/ar/model_sharing.md
+++ b/docs/source/ar/model_sharing.md
@ -65,43 +65,15 @@ pip install huggingface_hub

 تحويل نقطة التحقق لإطار عمل آخر أمر سهل. تأكد من تثبيت PyTorch و TensorFlow (راجع [هنا](installation) لتعليمات التثبيت)، ثم ابحث عن النموذج الملائم لمهمتك في الإطار الآخر.

-<frameworkcontent>
-<pt>
 حدد `from_tf=True` لتحويل نقطة تحقق من TensorFlow إلى PyTorch:

 ```py
 >>> pt_model = DistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_tf=True)
 >>> pt_model.save_pretrained("path/to/awesome-name-you-picked")
 ```
-</pt>
-<tf>
-حدد `from_pt=True` لتحويل نقطة تحقق من PyTorch إلى TensorFlow:
-
-```py
->>> tf_model = TFDistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_pt=True)
-```
-
-بعد ذلك، يمكنك حفظ نموذج TensorFlow الجديد بنقطة التحقق الجديدة:
-
-```py
->>> tf_model.save_pretrained("path/to/awesome-name-you-picked")
-```
-</tf>
-<jax>
-إذا كان النموذج متاحًا في Flax، فيمكنك أيضًا تحويل نقطة تحقق من PyTorch إلى Flax:
-
-```py
->>> flax_model = FlaxDistilBertForSequenceClassification.from_pretrained(
-...     "path/to/awesome-name-you-picked", from_pt=True
-... )
-```
-</jax>
-</frameworkcontent>

 ## دفع نموذج أثناء التدريب

-<frameworkcontent>
-<pt>
 <Youtube id="Z1-XMy-GNLQ"/>

 مشاركة نموذجك على Hub مر بسيط للغاية كل ما عليك هو إضافة معلمة أو استدعاء رد إضافي. كما تذكر من درس [التدريب الدقيق](training)، فإن فئة [`TrainingArguments`] هي المكان الذي تحدد فيه المعلمات الفائقة وخيارات التدريب الإضافية. تشمل إحدى خيارات التدريب هذه القدرة على دفع النموذج مباشرة إلى المنصة Hub. قم بتعيين `push_to_hub=True` في [`TrainingArguments`]:
@ -127,29 +99,6 @@ pip install huggingface_hub
 ```py
 >>> trainer.push_to_hub()
 ```
-</pt>
-<tf>
-شارك نموذجًا على Hub باستخدام [`PushToHubCallback`]. في دالة [`PushToHubCallback`], أضف:
-
- دليل إخراج لنموذجك.
- مُجزّئ اللغوي.
- `hub_model_id`، والذي هو اسم مستخدم Hub واسم النموذج الخاص بك.
-
-```py
->>> from transformers import PushToHubCallback
-
->>> push_to_hub_callback = PushToHubCallback(
-...     output_dir="./your_model_save_path", tokenizer=tokenizer, hub_model_id="your-username/my-awesome-model"
-... )
-```
-
-أضف الاستدعاء إلى [`fit`](https://keras.io/api/models/model_training_apis/)، وسيقوم 🤗 Transformers بدفع النموذج المدرب إلى Hub:
-
-```py
->>> model.fit(tf_train_dataset, validation_data=tf_validation_dataset, epochs=3, callbacks=push_to_hub_callback)
-```
-</tf>
-</frameworkcontent>

 ## استخدام دالة `push_to_hub`

@ -220,4 +169,4 @@ pip install huggingface_hub
 * قم بإنشاء ملف `README.md` وتحميله يدويًا.
 * انقر فوق الزر **Edit model card** في مستودع نموذجك.

-الق نظرة على بطاقة [DistilBert](https://huggingface.co/distilbert/distilbert-base-uncased) للحصول على مثال جيد على نوع المعلومات التي يجب أن تتضمنها بطاقة النموذج. للحصول على مزيد من التفاصيل حول الخيارات الأخرى التي يمكنك التحكم فيها في ملف `README.md` مثل البصمة الكربونية للنموذج أو أمثلة الأداة، راجع الوثائق [هنا](https://huggingface.co/docs/hub/models-cards).
+الق نظرة على بطاقة [DistilBert](https://huggingface.co/distilbert/distilbert-base-uncased) للحصول على مثال جيد على نوع المعلومات التي يجب أن تتضمنها بطاقة النموذج. للحصول على مزيد من التفاصيل حول الخيارات الأخرى التي يمكنك التحكم فيها في ملف `README.md` مثل البصمة الكربونية للنموذج أو أمثلة الأداة، راجع الوثائق [هنا](https://huggingface.co/docs/hub/models-cards).
--- a/docs/source/ar/notebooks.md
+++ b/docs/source/ar/notebooks.md
@ -39,7 +39,6 @@
 | [كيفية ضبط نموذج بدقة على التلخيص](https://github.com/huggingface/notebooks/blob/main/examples/summarization.ipynb)| يوضح كيفية معالجة البيانات مسبقًا وضبط نموذج مُدرَّب مسبقًا بدقة على XSUM. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/summarization.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/summarization.ipynb)|
 | [كيفية تدريب نموذج لغة من البداية](https://github.com/huggingface/blog/blob/main/notebooks/01_how_to_train.ipynb)| تسليط الضوء على جميع الخطوات لتدريب نموذج Transformer بشكل فعال على بيانات مخصصة | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/blog/blob/main/notebooks/01_how_to_train.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/blog/blob/main/notebooks/01_how_to_train.ipynb)|
 | [كيفية إنشاء نص](https://github.com/huggingface/blog/blob/main/notebooks/02_how_to_generate.ipynb)| كيفية استخدام أساليب فك التشفير المختلفة لإنشاء اللغة باستخدام المحولات | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/blog/blob/main/notebooks/02_how_to_generate.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/blog/blob/main/notebooks/02_how_to_generate.ipynb)|
-| [كيفية إنشاء نص (مع قيود)](https://github.com/huggingface/blog/blob/main/notebooks/53_constrained_beam_search.ipynb)| كيفية توجيه إنشاء اللغة باستخدام القيود التي يوفرها المستخدم | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/blog/blob/main/notebooks/53_constrained_beam_search.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/blog/blob/main/notebooks/53_constrained_beam_search.ipynb)|
 | [Reformer](https://github.com/huggingface/blog/blob/main/notebooks/03_reformer.ipynb)| كيف يدفع Reformer حدود النمذجة اللغوية | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patrickvonplaten/blog/blob/main/notebooks/03_reformer.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/patrickvonplaten/blog/blob/main/notebooks/03_reformer.ipynb)|

 #### رؤية الكمبيوتر[[pytorch-cv]]
--- a/docs/source/ar/preprocessing.md
+++ b/docs/source/ar/preprocessing.md
@ -152,8 +152,6 @@ pip install datasets

 قم بتعيين معلمة `return_tensors` إلى إما `pt` لـ PyTorch، أو `tf` لـ TensorFlow:

-<frameworkcontent>
-<pt>

 ```py
 >>> batch_sentences = [
@ -173,33 +171,6 @@ pip install datasets
                           [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                           [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}
 ```
-</pt>
-<tf>
- 
-```py
->>> batch_sentences = [
-...     "But what about second breakfast?",
-...     "Don't think he knows about second breakfast, Pip.",
-...     "What about elevensies?",
-... ]
->>> encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="tf")
->>> print(encoded_input)
-{'input_ids': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
-array([[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
-       [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
-       [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
-      dtype=int32)>,
- 'token_type_ids': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
-array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)>,
- 'attention_mask': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
-array([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
-       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-       [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)>}
-```
-</tf>
-</frameworkcontent>

 <Tip>

--- a/docs/source/ar/quicktour.md
+++ b/docs/source/ar/quicktour.md
@ -12,20 +12,10 @@

 ستحتاج أيضًا إلى تثبيت إطار عمل التعلم الآلي المفضل لديك:

-<frameworkcontent>
-<pt>

 ```bash
 pip install torch
 ```
-</pt>
-<tf>
-
-```bash
-pip install tensorflow
-```
-</tf>
-</frameworkcontent>

 ## خط الأنابيب

@ -122,8 +112,6 @@ label: NEGATIVE, with score: 0.5309
 >>> model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
 ```

-<frameworkcontent>
-<pt>
 استخدم [`AutoModelForSequenceClassification`] و [`AutoTokenizer`] لتحميل النموذج المُدرب مسبقًا ومعالجته المرتبط به (مزيد من المعلومات حول `AutoClass` في القسم التالي):

 ```py
@ -132,18 +120,6 @@ label: NEGATIVE, with score: 0.5309
 >>> model = AutoModelForSequenceClassification.from_pretrained(model_name)
 >>> tokenizer = AutoTokenizer.from_pretrained(model_name)
 ```
-</pt>
-<tf>
-استخدم [`TFAutoModelForSequenceClassification`] و [`AutoTokenizer`] لتحميل النموذج المُدرب مسبقًا ومعالجته المرتبط به (مزيد من المعلومات حول `TFAutoClass` في القسم التالي):
-
-```py
->>> from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
-
->>> model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
->>> tokenizer = AutoTokenizer.from_pretrained(model_name)
-```
-</tf>
-</frameworkcontent>

 حدد النموذج والمعالج في [`pipeline`]. الآن يمكنك تطبيق `classifier` على النص الفرنسي:

@ -192,8 +168,6 @@ label: NEGATIVE, with score: 0.5309

 يمكن المجزئ أيضًا قبول قائمة من المدخلات، ويقوم بـ "حشو" و"تقصير" النص لإرجاع كدفعة بطول موحد:

-<frameworkcontent>
-<pt>

 ```py
 >>> pt_batch = tokenizer(
@ -204,20 +178,6 @@ label: NEGATIVE, with score: 0.5309
 ...     return_tensors="pt",
 ... )
 ```
-</pt>
-<tf>
-
-```py
->>> tf_batch = tokenizer(
-...     ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
-...     padding=True,
-...     truncation=True,
-...     max_length=512,
-...     return_tensors="tf",
-... )
-```
-</tf>
-</frameworkcontent>

 <Tip>

@ -227,8 +187,6 @@ label: NEGATIVE, with score: 0.5309

 ### AutoModel

-<frameworkcontent>
-<pt>
 تقدم مكتبة 🤗 Transformers طريقة بسيطة وموحدة لتحميل نماذج مدربة مسبقًا. وهذا يعني أنه يمكنك تحميل [`AutoModel`] كما لو كنت تقوم بتحميل [`AutoTokenizer`]. الفرق الوحيد هو اختيار فئة [`AutoModel`] المناسبة للمهمة. بالنسبة لتصنيف النص (أو التسلسل)، يجب عليك تحميل [`AutoModelForSequenceClassification`]:

 ```py
@ -264,39 +222,6 @@ label: NEGATIVE, with score: 0.5309
 tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],
        [0.2084, 0.1826, 0.1969, 0.1755, 0.2365]], grad_fn=<SoftmaxBackward0>)
 ```
-</pt>
-<tf>
-يوفر 🤗 Transformers طريقة بسيطة وموحدة لتحميل مثيلات مُدربة مسبقًا. وهذا يعني أنه يمكنك تحميل [`TFAutoModel`] مثل تحميل [`AutoTokenizer`]. والفرق الوحيد هو تحديد [`TFAutoModel`] الصحيح للمهمة. للتصنيف النصي (أو التسلسلي)، يجب تحميل [`TFAutoModelForSequenceClassification`]:
-
-```py
->>> from transformers import TFAutoModelForSequenceClassification
-
->>> model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
->>> tf_model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
-```
-
-<Tip>
-
-راجع [ملخص المهام](./task_summary) للمهام المدعومة بواسطة فئة [`AutoModel`].
-
-</Tip>
-
-الآن، مرر دفعة المدخلات المعالجة مسبقًا مباشرة إلى النموذج. يمكنك تمرير المصفوفات كما هي:
-
-```py
->>> tf_outputs = tf_model(tf_batch)
-```
-
-يقوم النموذج بإخراج التنشيطات النهائية في سمة `logits`. طبق دالة softmax على `logits` لاسترداد الاحتمالات:
-
-```py
->>> import tensorflow as tf
-
->>> tf_predictions = tf.nn.softmax(tf_outputs.logits, axis=-1)
->>> tf_predictions  # doctest: +IGNORE_RESULT
-```
-</tf>
-</frameworkcontent>

 <Tip>

@ -306,8 +231,6 @@ tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],

 ### حفظ النموذج

-<frameworkcontent>
-<pt>
 بمجرد ضبط نموذجك، يمكنك حفظه مع برنامج الترميز الخاص به باستخدام [`PreTrainedModel.save_pretrained`]:

 ```py
@ -321,28 +244,9 @@ tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],
 ```py
 >>> pt_model = AutoModelForSequenceClassification.from_pretrained("./pt_save_pretrained")
 ```
-</pt>
-<tf>
-بمجرد ضبط نموذجك، يمكنك حفظه مع برنامج الترميز الخاص به باستخدام [`TFPreTrainedModel.save_pretrained`]:
-
-```py
->>> tf_save_directory = "./tf_save_pretrained"
->>> tokenizer.save_pretrained(tf_save_directory)  # doctest: +IGNORE_RESULT
->>> tf_model.save_pretrained(tf_save_directory)
-```
-
-عندما تكون مستعدًا لاستخدام النموذج مرة أخرى، أعد تحميله باستخدام [`TFPreTrainedModel.from_pretrained`]:
-
-```py
->>> tf_model = TFAutoModelForSequenceClassification.from_pretrained("./tf_save_pretrained")
-```
-</tf>
-</frameworkcontent>

 من الميزات الرائعة في 🤗 Transformers القدرة على حفظ نموذج وإعادة تحميله كنموذج PyTorch أو TensorFlow. يمكن أن يحول معامل `from_pt` أو `from_tf` النموذج من إطار عمل إلى آخر:

-<frameworkcontent>
-<pt>

 ```py
 >>> from transformers import AutoModel
@ -350,17 +254,6 @@ tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],
 >>> tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
 >>> pt_model = AutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
 ```
-</pt>
-<tf>
-
-```py
->>> from transformers import TFAutoModel
-
->>> tokenizer = AutoTokenizer.from_pretrained(tf_save_directory)
->>> tf_model = TFAutoModelForSequenceClassification.from_pretrained(tf_save_directory, from_tf=True)
-```
-</tf>
-</frameworkcontent>


 ## إنشاء نماذج مخصصة
@ -375,8 +268,6 @@ tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],
 >>> my_config = AutoConfig.from_pretrained("distilbert/distilbert-base-uncased", n_heads=12)
 ```

-<frameworkcontent>
-<pt>
 قم بإنشاء نموذج من تكوينك المخصص باستخدام [`AutoModel.from_config`]:

 ```py
@ -384,17 +275,6 @@ tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],

 >>> my_model = AutoModel.from_config(my_config)
 ```
-</pt>
-<tf>
-قم بإنشاء نموذج من تكوينك المخصص باستخدام [`TFAutoModel.from_config`]:
-
-```py
->>> from transformers import TFAutoModel
-
->>> my_model = TFAutoModel.from_config(my_config)
-```
-</tf>
-</frameworkcontent>

 الق نظرة على دليل [إنشاء بنية مخصصة](./create_a_model) لمزيد من المعلومات حول بناء التكوينات المخصصة.

--- a/docs/source/ar/run_scripts.md
+++ b/docs/source/ar/run_scripts.md
@ -76,8 +76,6 @@ pip install -r requirements.txt

 ## تشغيل نص برمجي

-<frameworkcontent>
-<pt>
    
 - يقوم النص البرمجي التوضيحي بتنزيل مجموعة بيانات ومعالجتها مسبقًا من مكتبة 🤗 [Datasets](https://huggingface.co/docs/datasets).
 - ثم يقوم النص البرمجي بضبط نموذج بيانات دقيق باستخدام [Trainer](https://huggingface.co/docs/transformers/main_classes/trainer) على بنية تدعم الملخص. 
@ -95,31 +93,8 @@ python examples/pytorch/summarization/run_summarization.py \
    --output_dir /tmp/tst-summarization \
    --per_device_train_batch_size=4 \
    --per_device_eval_batch_size=4 \
-    --overwrite_output_dir \
    --predict_with_generate
 ```
-</pt>
-<tf>
-    
- يقوم النص البرمجي التوضيحي بتنزيل مجموعة بيانات ومعالجتها مسبقًا من مكتبة 🤗 [Datasets](https://huggingface.co/docs/datasets/).
- ثم يقوم النص البرمجي بضبط نموذج بيانات دقيق باستخدام Keras على بنية تدعم الملخص.
- يوضح المثال التالي كيفية ضبط نموذج [T5-small](https://huggingface.co/google-t5/t5-small) على مجموعة بيانات [CNN/DailyMail](https://huggingface.co/datasets/cnn_dailymail).
- يتطلب نموذج T5 ماعمل `source_prefix` إضافية بسبب الطريقة التي تم تدريبه بها. يتيح هذا المطالبة لـ T5 معرفة أن هذه مهمة التلخيص.
-
-```bash
-python examples/tensorflow/summarization/run_summarization.py  \
-    --model_name_or_path google-t5/t5-small \
-    --dataset_name cnn_dailymail \
-    --dataset_config "3.0.0" \
-    --output_dir /tmp/tst-summarization  \
-    --per_device_train_batch_size 8 \
-    --per_device_eval_batch_size 16 \
-    --num_train_epochs 3 \
-    --do_train \
-    --do_eval
-```
-</tf>
-</frameworkcontent>

 ## التدريب الموزع والدقة المختلطة

@ -141,7 +116,6 @@ torchrun \
    --output_dir /tmp/tst-summarization \
    --per_device_train_batch_size=4 \
    --per_device_eval_batch_size=4 \
-    --overwrite_output_dir \
    --predict_with_generate
 ```

@ -149,8 +123,6 @@ torchrun \

 ## تشغيل نص برمجي على وحدة معالجة الدقة الفائقة (TPU)

-<frameworkcontent>
-<pt>
    
 تُعد وحدات معالجة الدقة الفائقة (TPUs) مصممة خصيصًا لتسريع الأداء. يدعم PyTorch وحدات معالجة الدقة الفائقة (TPUs) مع [XLA](https://www.tensorflow.org/xla) مجمع الدقة الفائقة للتعلم العميق (راجع [هنا](https://github.com/pytorch/xla/blob/master/README.md) لمزيد من التفاصيل). لاستخدام وحدة معالجة الدقة الفائقة (TPU)، قم بتشغيل نص `xla_spawn.py` البرمجي واستخدم معامل `num_cores` لتعيين عدد وحدات معالجة الدقة الفائقة (TPU) التي تريد استخدامها.

@ -166,28 +138,8 @@ python xla_spawn.py --num_cores 8 \
    --output_dir /tmp/tst-summarization \
    --per_device_train_batch_size=4 \
    --per_device_eval_batch_size=4 \
-    --overwrite_output_dir \
    --predict_with_generate
 ```
-</pt>
-<tf>
-    
-تُعد وحدات معالجة الدقة الفائقة (TPUs) مصممة خصيصًا لتسريع الأداء. تستخدم نصوص TensorFlow البرمجية استراتيجية [`TPUStrategy`](https://www.tensorflow.org/guide/distributed_training#tpustrategy) للتدريب على وحدات معالجة الدقة الفائقة (TPUs). لاستخدام وحدة معالجة الدقة الفائقة (TPU)، قم بتمرير اسم مورد وحدة معالجة الدقة الفائقة (TPU) إلى حجة `tpu`.
-```bash
-python run_summarization.py  \
-    --tpu name_of_tpu_resource \
-    --model_name_or_path google-t5/t5-small \
-    --dataset_name cnn_dailymail \
-    --dataset_config "3.0.0" \
-    --output_dir /tmp/tst-summarization  \
-    --per_device_train_batch_size 8 \
-    --per_device_eval_batch_size 16 \
-    --num_train_epochs 3 \
-    --do_train \
-    --do_eval
-```
-</tf>
-</frameworkcontent>

 ## تشغيل نص برمجي باستخدام 🤗 Accelerate

@ -242,7 +194,6 @@ python examples/pytorch/summarization/run_summarization.py \
    --summary_column summary_column_name \
    --source_prefix "summarize: " \
    --output_dir /tmp/tst-summarization \
-    --overwrite_output_dir \
    --per_device_train_batch_size=4 \
    --per_device_eval_batch_size=4 \
    --predict_with_generate
@ -270,7 +221,6 @@ python examples/pytorch/summarization/run_summarization.py \
    --output_dir /tmp/tst-summarization \
    --per_device_train_batch_size=4 \
    --per_device_eval_batch_size=4 \
-    --overwrite_output_dir \
    --predict_with_generate
 ```

@ -284,8 +234,6 @@ examples/pytorch/summarization/run_summarization.py -h

 خيار آخر مفيد لتمكينه هو استئناف التدريب من نقطة تفتيش سابقة. سيضمن ذلك أنك تستطيع الاستمرار من حيث توقفت دون البدء من جديد إذا تم مقاطعة تدريبك. هناك طريقتان لاستئناف التدريب من نقطة تفتيش.

-تستخدم الطريقة الأولى المعلمة `output_dir previous_output_dir` لاستئناف التدريب من أحدث نقطة تفتيش مخزنة في `output_dir`. في هذه الحالة، يجب عليك إزالة `overwrite_output_dir`:
-
 ```bash
 python examples/pytorch/summarization/run_summarization.py
    --model_name_or_path google-t5/t5-small \
@ -297,24 +245,6 @@ python examples/pytorch/summarization/run_summarization.py
    --output_dir /tmp/tst-summarization \
    --per_device_train_batch_size=4 \
    --per_device_eval_batch_size=4 \
-    --output_dir previous_output_dir \
-    --predict_with_generate
-```
-
-تستخدم الطريقة الثانية معلمة `resume_from_checkpoint path_to_specific_checkpoint` لاستئناف التدريب من مجلد نقطة تفتيش محددة.
-
-```bash
-python examples/pytorch/summarization/run_summarization.py
-    --model_name_or_path google-t5/t5-small \
-    --do_train \
-    --do_eval \
-    --dataset_name cnn_dailymail \
-    --dataset_config "3.0.0" \
-    --source_prefix "summarize: " \
-    --output_dir /tmp/tst-summarization \
-    --per_device_train_batch_size=4 \
-    --per_device_eval_batch_size=4 \
-    --overwrite_output_dir \
    --resume_from_checkpoint path_to_specific_checkpoint \
    --predict_with_generate
 ```
@ -346,6 +276,5 @@ python examples/pytorch/summarization/run_summarization.py
    --output_dir /tmp/tst-summarization \
    --per_device_train_batch_size=4 \
    --per_device_eval_batch_size=4 \
-    --overwrite_output_dir \
    --predict_with_generate
 ```
--- a/docs/source/ar/tasks/language_modeling.md
+++ b/docs/source/ar/tasks/language_modeling.md
@ -182,8 +182,6 @@ pip install transformers datasets evaluate

 الآن قم بإنشاء دفعة من الأمثلة باستخدام [`DataCollatorForLanguageModeling`]. من الأفضل أن تقوم بـ *الحشو الديناميكي* للجمل إلى الطول الأطول في الدفعة أثناء التجميع، بدلاً من حشو كامل المجموعة من البيانات إلى الطول الأقصى.

-<frameworkcontent>
-<pt>
 استخدم رمز نهاية التسلسل كرمز للحشو، وحدد `mlm_probability` لحجب الرموز بشكل عشوائي عند كل تكرار للبيانات:

 ```py
@ -193,23 +191,9 @@ pip install transformers datasets evaluate
 >>> data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
 ```

-</pt>
-<tf>
-استخدم رمز نهاية التسلسل كرمز للحشو، وحدد `mlm_probability` لحجب الرموز بشكل عشوائي عند كل تكرار للبيانات:
-
-```py
->>> from transformers import DataCollatorForLanguageModeling
-
->>> data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False, return_tensors="tf")
-```
-
-</tf>
-</frameworkcontent>

 ## التدريب (Train)

-<frameworkcontent>
-<pt>

 <Tip>

@ -267,75 +251,6 @@ Perplexity: 49.61
 ```py
 >>> trainer.push_to_hub()
 ```
-</pt>
-<tf>
-<Tip>
-
-إذا لم تكن على دراية بتدريب نموذج باستخدام Keras، اطلع على [البرنامج التعليمي الأساسي](../training#train-a-tensorflow-model-with-keras)!
-
-</Tip>
-لتدريب نموذج في TensorFlow، ابدأ بإعداد دالة المحسن، وجدول معدل التعلم، وبعض معاملات التدريب:
-
-```py
->>> from transformers import create_optimizer, AdamWeightDecay
-
->>> optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
-```
-
-ثم يمكنك تحميل DistilGPT2 باستخدام [`TFAutoModelForCausalLM`]:
-
-```py
->>> from transformers import TFAutoModelForCausalLM
-
->>> model = TFAutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
-```
-
-حول مجموعات بياناتك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
-
-```py
->>> tf_train_set = model.prepare_tf_dataset(
-...     lm_dataset["train"],
-...     shuffle=True,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-
->>> tf_test_set = model.prepare_tf_dataset(
-...     lm_dataset["test"],
-...     shuffle=False,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-```
-
-قم بتهيئة النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن جميع نماذج Transformers لديها دالة خسارة ذات صلة بالمهمة الافتراضية، لذلك لا تحتاج إلى تحديد واحدة ما لم ترغب في ذلك:
-
-```py
->>> import tensorflow as tf
-
->>> model.compile(optimizer=optimizer)  # لا يوجد حجة للخسارة!
-```
-
-يمكن القيام بذلك عن طريق تحديد مكان دفع نموذجك ومجمّع البيانات في [`~transformers.PushToHubCallback`]:
-
-```py
->>> from transformers.keras_callbacks import PushToHubCallback
-
->>> callback = PushToHubCallback(
-...     output_dir="my_awesome_eli5_clm-model",
-...     tokenizer=tokenizer,
-... )
-```
-
-أخيراً، أنت جاهز لبدء تدريب نموذجك! قم باستدعاء [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق من الصحة، وعدد العصور، والتعليقات الخاصة بك لتدريب النموذج:
-
-```py
->>> model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
-```
-
-بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
-</tf>
-</frameworkcontent>

 <Tip>

@ -365,8 +280,6 @@ Perplexity: 49.61
 [{'generated_text': "Somatic hypermutation allows the immune system to be able to effectively reverse the damage caused by an infection.\n\n\nThe damage caused by an infection is caused by the immune system's ability to perform its own self-correcting tasks."}]
 ```

-<frameworkcontent>
-<pt>
 قسم النص وإرجع `input_ids` كتنسورات PyTorch:

 ```py
@ -392,31 +305,3 @@ Perplexity: 49.61
 >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
 ["Somatic hypermutation allows the immune system to react to drugs with the ability to adapt to a different environmental situation. In other words, a system of 'hypermutation' can help the immune system to adapt to a different environmental situation or in some cases even a single life. In contrast, researchers at the University of Massachusetts-Boston have found that 'hypermutation' is much stronger in mice than in humans but can be found in humans, and that it's not completely unknown to the immune system. A study on how the immune system"]
 ```
-</pt>
-<tf>
-قم بتقسيم النص وإرجاع `input_ids` كـ TensorFlow tensors:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
->>> inputs = tokenizer(prompt, return_tensors="tf").input_ids
-```
-
-استخدم طريقة [`~transformers.generation_tf_utils.TFGenerationMixin.generate`] لإنشاء الملخص. للمزيد من التفاصيل حول استراتيجيات توليد النص المختلفة والبارامترات للتحكم في التوليد، راجع صفحة [استراتيجيات توليد النص](../generation_strategies).
-
-```py
->>> from transformers import TFAutoModelForCausalLM
-
->>> model = TFAutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
->>> outputs = model.generate(input_ids=inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
-```
-
-فك ترميز  الرموز المولدة مرة أخرى إلى نص:
-
-```py
->>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
-['Somatic hypermutation allows the immune system to detect the presence of other viruses as they become more prevalent. Therefore, researchers have identified a high proportion of human viruses. The proportion of virus-associated viruses in our study increases with age. Therefore, we propose a simple algorithm to detect the presence of these new viruses in our samples as a sign of improved immunity. A first study based on this algorithm, which will be published in Science on Friday, aims to show that this finding could translate into the development of a better vaccine that is more effective for']
-```
-</tf>
-</frameworkcontent>
--- a/docs/source/ar/tasks/masked_language_modeling.md
+++ b/docs/source/ar/tasks/masked_language_modeling.md
@ -176,8 +176,6 @@ pip install transformers datasets evaluate

 الآن، قم بإنشاء دفعة من الأمثلة باستخدام [`DataCollatorForLanguageModeling`]. من الأكثر كفاءة أن تقوم بـ *الحشو الديناميكي* ليصل طولها إلى أطول جملة في الدفعة أثناء التجميع، بدلاً من حشو مجموعة البيانات بأكملها إلى الطول الأقصى.

-<frameworkcontent>
-<pt>

 استخدم رمز نهاية التسلسل كرمز الحشو وحدد `mlm_probability` لحجب الرموز عشوائياً كل مرة تكرر فيها البيانات:

@ -187,23 +185,9 @@ pip install transformers datasets evaluate
 >>> tokenizer.pad_token = tokenizer.eos_token
 >>> data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)
 ```
-</pt>
-<tf>
-
-استخدم رمز نهاية التسلسل كرمز الحشو وحدد `mlm_probability` لحجب الرموز عشوائياً كل مرة تكرر فيها البيانات:
-
-```py
->>> from transformers import DataCollatorForLanguageModeling
-
->>> data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15, return_tensors="tf")
-```
-</tf>
-</frameworkcontent>

 ## التدريب (Train)

-<frameworkcontent>
-<pt>

 <Tip>

@ -263,75 +247,6 @@ Perplexity: 8.76
 ```py
 >>> trainer.push_to_hub()
 ```
-</pt>
-<tf>
-<Tip>
-
-إذا لم تكن على دراية بتعديل نموذج باستخدام Keras، ألق نظرة على الدليل الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
-
-</Tip>
-لتعديل نموذج في TensorFlow، ابدأ بإعداد دالة محسن، وجدول معدل التعلم، وبعض معلمات التدريب:
-
-```py
->>> from transformers import create_optimizer, AdamWeightDecay
-
->>> optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
-```
-
-ثم يمكنك تحميل DistilRoBERTa باستخدام [`TFAutoModelForMaskedLM`]:
-
-```py
->>> from transformers import TFAutoModelForMaskedLM
-
->>> model = TFAutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
-```
-
-قم بتحويل مجموعات بياناتك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
-
-```py
->>> tf_train_set = model.prepare_tf_dataset(
-...     lm_dataset["train"],
-...     shuffle=True,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-
->>> tf_test_set = model.prepare_tf_dataset(
-...     lm_dataset["test"],
-...     shuffle=False,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-```
-
-قم بتهيئة النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن نماذج Transformers لديها جميعها دالة خسارة افتراضية ذات صلة بالمهمة، لذلك لا تحتاج إلى تحديد واحدة ما لم تكن تريد ذلك:
-
-```py
->>> import tensorflow as tf
-
->>> model.compile(optimizer=optimizer)  # لا توجد حجة للخسارة!
-```
-
-يمكن القيام بذلك عن طريق تحديد مكان دفع نموذجك ومعالج الرموز في [`~transformers.PushToHubCallback`]:
-
-```py
->>> from transformers.keras_callbacks import PushToHubCallback
-
->>> callback = PushToHubCallback(
-...     output_dir="my_awesome_eli5_mlm_model",
-...     tokenizer=tokenizer,
-... )
-```
-
-أخيراً، أنت مستعد لبدء تدريب نموذجك! قم باستدعاء [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق، وعدد العصور، والتعليقات الخاصة بك لتعديل النموذج:
-
-```py
->>> model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
-```
-
-بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائياً إلى Hub حتى يتمكن الجميع من استخدامه!
-</tf>
-</frameworkcontent>

 <Tip>

@ -372,8 +287,6 @@ Perplexity: 8.76
  'sequence': 'The Milky Way is a small galaxy.'}]
 ```

-<frameworkcontent>
-<pt>
 قم بتجزئة النص وإرجاع `input_ids` كمتجهات PyTorch. ستحتاج أيضًا إلى تحديد موضع رمز `<mask>`:

 ```py
@ -405,38 +318,3 @@ The Milky Way is a spiral galaxy.
 The Milky Way is a massive galaxy.
 The Milky Way is a small galaxy.
 ```
-</pt>
-<tf>
-قم بتقسيم النص إلى رموز وإرجاع `input_ids` كـ TensorFlow tensors. ستحتاج أيضًا إلى تحديد موضع رمز `<mask>`:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
->>> inputs = tokenizer(text, return_tensors="tf")
->>> mask_token_index = tf.where(inputs["input_ids"] == tokenizer.mask_token_id)[0, 1]
-```
-
-قم بتمرير المدخلات إلى النموذج وإرجاع `logits` للرمز المقنع:
-
-```py
->>> from transformers import TFAutoModelForMaskedLM
-
->>> model = TFAutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
->>> logits = model(**inputs).logits
->>> mask_token_logits = logits[0, mask_token_index, :]
-```
-
-ثم قم بإرجاع الرموز الثلاثة المقنعة ذات الاحتمالية الأعلى وطباعتها:
-
-```py
->>> top_3_tokens = tf.math.top_k(mask_token_logits, 3).indices.numpy()
-
->>> for token in top_3_tokens:
-...     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
-The Milky Way is a spiral galaxy.
-The Milky Way is a massive galaxy.
-The Milky Way is a small galaxy.
-```
-</tf>
-</frameworkcontent>
--- a/docs/source/ar/tasks/multiple_choice.md
+++ b/docs/source/ar/tasks/multiple_choice.md
@ -116,8 +116,6 @@ tokenized_swag = swag.map(preprocess_function, batched=True)

 يقوم `DataCollatorForMultipleChoice` بتجميع جميع مدخلات النموذج، ويطبق الحشو، ثم يعيد تجميع النتائج في شكلها الأصلي:

-<frameworkcontent>
-<pt>

 ```py
 >>> from dataclasses import dataclass
@ -158,50 +156,6 @@ tokenized_swag = swag.map(preprocess_function, batched=True)
 ...         batch["labels"] = torch.tensor(labels, dtype=torch.int64)
 ...         return batch
 ```
-</pt>
-<tf>
- 
-```py
->>> from dataclasses import dataclass
->>> from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
->>> from typing import Optional, Union
->>> import tensorflow as tf
-
->>> @dataclass
-... class DataCollatorForMultipleChoice:
-...     """
-...     Data collator that will dynamically pad the inputs for multiple choice received.
-...     """
-
-...     tokenizer: PreTrainedTokenizerBase
-...     padding: Union[bool, str, PaddingStrategy] = True
-...     max_length: Optional[int] = None
-...     pad_to_multiple_of: Optional[int] = None
-
-...     def __call__(self, features):
-...         label_name = "label" if "label" in features[0].keys() else "labels"
-...         labels = [feature.pop(label_name) for feature in features]
-...         batch_size = len(features)
-...         num_choices = len(features[0]["input_ids"])
-...         flattened_features = [
-...             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
-...         ]
-...         flattened_features = sum(flattened_features, [])
-
-...         batch = self.tokenizer.pad(
-...             flattened_features,
-...             padding=self.padding,
-...             max_length=self.max_length,
-...             pad_to_multiple_of=self.pad_to_multiple_of,
-...             return_tensors="tf",
-...         )
-
-...         batch = {k: tf.reshape(v, (batch_size, num_choices, -1)) for k, v in batch.items()}
-...         batch["labels"] = tf.convert_to_tensor(labels, dtype=tf.int64)
-...         return batch
-```
-</tf>
-</frameworkcontent>

 ## التقييم (Evaluate)

@ -228,8 +182,6 @@ tokenized_swag = swag.map(preprocess_function, batched=True)

 ## التدريب (Train)

-<frameworkcontent>
-<pt>

 <Tip>

@ -283,93 +235,6 @@ tokenized_swag = swag.map(preprocess_function, batched=True)
 ```py
 >>> trainer.push_to_hub()
 ```
-</pt>
-<tf>
-<Tip>
-
-إذا لم تكن معتادًا على ضبط نموذج باستخدام Keras، فراجع الدرس الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
-
-</Tip>
-لضبط نموذج في TensorFlow، ابدأ بإعداد دالة مُحسِّن وجدول معدل التعلم وبعض معلمات التدريب:
-
-```py
->>> from transformers import create_optimizer
-
->>> batch_size = 16
->>> num_train_epochs = 2
->>> total_train_steps = (len(tokenized_swag["train"]) // batch_size) * num_train_epochs
->>> optimizer, schedule = create_optimizer(init_lr=5e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
-```
-
-ثم يمكنك تحميل BERT باستخدام [`TFAutoModelForMultipleChoice`]:
-
-```py
->>> from transformers import TFAutoModelForMultipleChoice
-
->>> model = TFAutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
-```
-
-حوّل مجموعات البيانات الخاصة بك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
-
-```py
->>> data_collator = DataCollatorForMultipleChoice(tokenizer=tokenizer)
->>> tf_train_set = model.prepare_tf_dataset(
-...     tokenized_swag["train"],
-...     shuffle=True,
-...     batch_size=batch_size,
-...     collate_fn=data_collator,
-... )
-
->>> tf_validation_set = model.prepare_tf_dataset(
-...     tokenized_swag["validation"],
-...     shuffle=False,
-...     batch_size=batch_size,
-...     collate_fn=data_collator,
-... )
-```
-
-قم بتهيئة النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن جميع نماذج Transformers تحتوي على دالة خسارة مناسبة للمهمة بشكل افتراضي، لذلك لا تحتاج إلى تحديد واحدة ما لم ترغب في ذلك:
-
-```py
->>> model.compile(optimizer=optimizer)  # لا توجد وسيطة خسارة!
-```
-
-الخطوتان الأخيرتان قبل بدء التدريب هما: حساب دقة التنبؤات، وتوفير طريقة لرفع النموذج إلى Hub. ويمكن تحقيق ذلك باستخدام [استدعاءات Keras](../main_classes/keras_callbacks)
-
-مرر دالتك `compute_metrics` إلى [`~transformers.KerasMetricCallback`]:
-
-```py
->>> from transformers.keras_callbacks import KerasMetricCallback
-
->>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
-```
-
-حدد مكان دفع نموذجك ومعالجك في [`~transformers.PushToHubCallback`]:
-
-```py
->>> from transformers.keras_callbacks import PushToHubCallback
-
->>> push_to_hub_callback = PushToHubCallback(
-...     output_dir="my_awesome_model",
-...     tokenizer=tokenizer,
-... )
-```
-
-ثم قم بتضمين الاستدعاءات معًا:
-
-```py
->>> callbacks = [metric_callback, push_to_hub_callback]
-```
-
-أخيرًا، أنت جاهز لبدء تدريب نموذجك! استدعِ[`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق من الصحة وعدد الحقب والاستدعاءات لضبط النموذج:
-
-```py
->>> model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=2, callbacks=callbacks)
-```
-
-بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
-</tf>
-</frameworkcontent>

 <Tip>

@ -390,8 +255,6 @@ tokenized_swag = swag.map(preprocess_function, batched=True)
 >>> candidate2 = "The law applies to baguettes."
 ```

-<frameworkcontent>
-<pt>
 قم بتحليل كل مطالبة وزوج إجابة مرشح وأعد تنسورات PyTorch. يجب عليك أيضًا إنشاء بعض `العلامات`:

 ```py
@ -419,34 +282,3 @@ tokenized_swag = swag.map(preprocess_function, batched=True)
 >>> predicted_class
 0
 ```
-</pt>
-<tf>
-قم بتحليل كل مطالبة وزوج إجابة مرشح وأعد موترات TensorFlow:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_swag_model")
->>> inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="tf", padding=True)
-```
-
-مرر مدخلاتك إلى النموذج وأعد القيم logits:
-
-```py
->>> from transformers import TFAutoModelForMultipleChoice
-
->>> model = TFAutoModelForMultipleChoice.from_pretrained("username/my_awesome_swag_model")
->>> inputs = {k: tf.expand_dims(v, 0) for k, v in inputs.items()}
->>> outputs = model(inputs)
->>> logits = outputs.logits
-```
-
-استخرج الفئة ذات الاحتمالية الأكبر:
-
-```py
->>> predicted_class = int(tf.math.argmax(logits, axis=-1)[0])
->>> predicted_class
-0
-```
-</tf>
-</frameworkcontent>
--- a/docs/source/ar/tasks/question_answering.md
+++ b/docs/source/ar/tasks/question_answering.md
@ -167,29 +167,15 @@ pip install transformers datasets evaluate

 الآن قم بإنشاء دفعة من الأمثلة باستخدام [`DefaultDataCollator`]. بخلاف مجمّعات البيانات الأخرى في 🤗 Transformers، لا يطبق [`DefaultDataCollator`] أي معالجة مسبقة إضافية مثل الحشو.

-<frameworkcontent>
-<pt>
 
 ```py
 >>> from transformers import DefaultDataCollator

 >>> data_collator = DefaultDataCollator()
 ```
-</pt>
-<tf>
- 
-```py
->>> from transformers import DefaultDataCollator
-
->>> data_collator = DefaultDataCollator(return_tensors="tf")
-```
-</tf>
-</frameworkcontent>

 ## التدريب (Train)

-<frameworkcontent>
-<pt>

 <Tip>

@ -240,82 +226,6 @@ pip install transformers datasets evaluate
 ```py
 >>> trainer.push_to_hub()
 ```
-</pt>
-<tf>
- 
-<Tip>
-
-إذا لم تكن معتادًا على ضبط نموذج باستخدام Keras، فألق نظرة على البرنامج التعليمي الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
-
-</Tip>
-لضبط نموذج في TensorFlow، ابدأ بإعداد دالة مُحسِّن، وجدول معدل التعلم، وبعض المعاملات الفائقة للتدريب:
-
-```py
->>> from transformers import create_optimizer
-
->>> batch_size = 16
->>> num_epochs = 2
->>> total_train_steps = (len(tokenized_squad["train"]) // batch_size) * num_epochs
->>> optimizer, schedule = create_optimizer(
-...     init_lr=2e-5,
-...     num_warmup_steps=0,
-...     num_train_steps=total_train_steps,
-... )
-```
-
-ثم يمكنك تحميل DistilBERT باستخدام [`TFAutoModelForQuestionAnswering`]:
-
-```py
->>> from transformers import TFAutoModelForQuestionAnswering
-
->>> model = TFAutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-حوّل مجموعات البيانات الخاصة بك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
-
-```py
->>> tf_train_set = model.prepare_tf_dataset(
-...     tokenized_squad["train"],
-...     shuffle=True,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-
->>> tf_validation_set = model.prepare_tf_dataset(
-...     tokenized_squad["test"],
-...     shuffle=False,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-```
-
-قم بتكوين النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method):
-
-```py
->>> import tensorflow as tf
-
->>> model.compile(optimizer=optimizer)
-```
-
-آخر شيء يجب إعداده قبل بدء التدريب هو توفير طريقة لدفع نموذجك إلى Hub. يمكن القيام بذلك عن طريق تحديد مكان دفع نموذجك ومعالجك المعجمي في [`~transformers.PushToHubCallback`]:
-
-```py
->>> from transformers.keras_callbacks import PushToHubCallback
-
->>> callback = PushToHubCallback(
-...     output_dir="my_awesome_qa_model",
-...     tokenizer=tokenizer,
-... )
-```
-
-أخيرًا، أنت جاهز لبدء تدريب نموذجك! اتصل بـ [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق من الصحة، وعدد العهود، ومعاودة الاتصال الخاصة بك لضبط النموذج:
-
-```py
->>> model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=[callback])
-```
-بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
-</tf>
-</frameworkcontent>


 <Tip>
@ -357,8 +267,6 @@ pip install transformers datasets evaluate

 يمكنك أيضًا تكرار نتائج `pipeline` يدويًا إذا أردت:

-<frameworkcontent>
-<pt>
 
 قسّم النص وأرجع تنسورات PyTorch:

@ -394,39 +302,3 @@ pip install transformers datasets evaluate
 >>> tokenizer.decode(predict_answer_tokens)
 '176 billion parameters and can generate text in 46 languages natural languages and 13'
 ```
-</pt>
-<tf>
-قم بتحليل النص المعجمي وأعد موترات TensorFlow:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
->>> inputs = tokenizer(question, context, return_tensors="tf")
-```
-
-مرر مدخلاتك إلى النموذج وأعد `logits`:
-
-```py
->>> from transformers import TFAutoModelForQuestionAnswering
-
->>> model = TFAutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
->>> outputs = model(**inputs)
-```
-
-احصل على أعلى احتمال من مخرجات النموذج لموضعي البداية والنهاية:
-
-```py
->>> answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
->>> answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])
-```
-
-استخلاص الإجابة من الرموز المتوقعة:
-
-```py
->>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
->>> tokenizer.decode(predict_answer_tokens)
-'176 billion parameters and can generate text in 46 languages natural languages and 13'
-```
-</tf>
-</frameworkcontent>
--- a/docs/source/ar/tasks/sequence_classification.md
+++ b/docs/source/ar/tasks/sequence_classification.md
@ -92,24 +92,12 @@ tokenized_imdb = imdb.map(preprocess_function, batched=True)

 الآن قم بإنشاء دفعة من الأمثلة باستخدام [`DataCollatorWithPadding`].  الأكثر كفاءة هو استخدام الحشو الديناميكي لجعل الجمل متساوية في الطول داخل كل دفعة، بدلًا من حشو كامل البيانات إلى الحد الأقصى للطول.

-<frameworkcontent>
-<pt>

 ```py
 >>> from transformers import DataCollatorWithPadding

 >>> data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
 ```
-</pt>
-<tf>
-
-```py
->>> from transformers import DataCollatorWithPadding
-
->>> data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
-```
-</tf>
-</frameworkcontent>

 ## التقييم(Evaluate)

@ -143,8 +131,6 @@ tokenized_imdb = imdb.map(preprocess_function, batched=True)
 >>> label2id = {"NEGATIVE": 0, "POSITIVE": 1}
 ```

-<frameworkcontent>
-<pt>
 <Tip>

 إذا لم تكن على دراية بضبط نموذج دقيق باستخدام [`Trainer`], فالق نظرة على البرنامج التعليمي الأساسي [هنا](../training#train-with-pytorch-trainer)!
@ -205,98 +191,6 @@ tokenized_imdb = imdb.map(preprocess_function, batched=True)
 ```py
 >>> trainer.push_to_hub()
 ```
-</pt>
-<tf>
-<Tip>
-
-إذا لم تكن على دراية بضبط نموذج باستخدام Keras، قم بالاطلاع على البرنامج التعليمي الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
-
-</Tip>
-لضبط نموذج في TensorFlow، ابدأ بإعداد دالة المحسن، وجدول معدل التعلم، وبعض معلمات التدريب:
-
-```py
->>> from transformers import create_optimizer
->>> import tensorflow as tf
-
->>> batch_size = 16
->>> num_epochs = 5
->>> batches_per_epoch = len(tokenized_imdb["train"]) // batch_size
->>> total_train_steps = int(batches_per_epoch * num_epochs)
->>> optimizer, schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
-```
-
-ثم يمكنك تحميل DistilBERT مع [`TFAutoModelForSequenceClassification`] بالإضافة إلى عدد التصنيفات المتوقعة، وتعيينات التسميات:
-
-```py
->>> from transformers import TFAutoModelForSequenceClassification
-
->>> model = TFAutoModelForSequenceClassification.from_pretrained(
-...     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
-... )
-```
-
-قم بتحويل مجموعات بياناتك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
-
-```py
->>> tf_train_set = model.prepare_tf_dataset(
-...     tokenized_imdb["train"],
-...     shuffle=True,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-
->>> tf_validation_set = model.prepare_tf_dataset(
-...     tokenized_imdb["test"],
-...     shuffle=False,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-```
-
-قم بتهيئة النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن جميع نماذج Transformers لديها دالة خسارة ذات صلة بالمهمة بشكل افتراضي، لذلك لا تحتاج إلى تحديد واحدة ما لم ترغب في ذلك:
-
-```py
->>> import tensorflow as tf
-
->>> model.compile(optimizer=optimizer)  # No loss argument!
-```
-
-آخر أمرين يجب إعدادهما قبل بدء التدريب هو حساب الدقة من التوقعات، وتوفير طريقة لدفع نموذجك إلى Hub. يتم ذلك باستخدام [Keras callbacks](../main_classes/keras_callbacks).
-
-قم بتمرير دالة `compute_metrics` الخاصة بك إلى [`~transformers.KerasMetricCallback`]:
-
-```py
->>> from transformers.keras_callbacks import KerasMetricCallback
-
->>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
-```
-
-حدد مكان دفع نموذجك والمجزئ اللغوي في [`~transformers.PushToHubCallback`]:
-
-```py
->>> from transformers.keras_callbacks import PushToHubCallback
-
->>> push_to_hub_callback = PushToHubCallback(
-...     output_dir="my_awesome_model",
-...     tokenizer=tokenizer,
-... )
-```
-
-ثم اجمع الاستدعاءات معًا:
-
-```py
->>> callbacks = [metric_callback, push_to_hub_callback]
-```
-
-أخيرًا، أنت مستعد لبدء تدريب نموذجك! قم باستدعاء [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق، وعدد الحقبات، واستدعاءاتك لضبط النموذج:
-
-```py
->>> model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
-```
-
-بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
-</tf>
-</frameworkcontent>

 <Tip>

@ -328,8 +222,6 @@ tokenized_imdb = imdb.map(preprocess_function, batched=True)

 يمكنك أيضًا تكرار نتائج `pipeline` يدويًا إذا أردت:

-<frameworkcontent>
-<pt>
 قم يتجزئة النص وإرجاع تنسورات PyTorch:

 ```py
@ -356,32 +248,3 @@ tokenized_imdb = imdb.map(preprocess_function, batched=True)
 >>> model.config.id2label[predicted_class_id]
 'POSITIVE'
 ```
-</pt>
-<tf>
-قم بتحليل النص وإرجاع تنسيقات TensorFlow:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
->>> inputs = tokenizer(text, return_tensors="tf")
-```
-
-قم بتمرير مدخلاتك إلى النموذج وإرجاع `logits`:
-
-```py
->>> from transformers import TFAutoModelForSequenceClassification
-
->>> model = TFAutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
->>> logits = model(**inputs).logits
-```
-
-استخرج الفئة ذات الاحتمالية الأعلى، واستخدم `id2label` لتحويلها إلى تصنيف نصي:
-
-```py
->>> predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
->>> model.config.id2label[predicted_class_id]
-'POSITIVE'
-```
-</tf>
-</frameworkcontent>
--- a/docs/source/ar/tasks/summarization.md
+++ b/docs/source/ar/tasks/summarization.md
@ -118,24 +118,12 @@ pip install transformers datasets evaluate rouge_score

 الآن قم بإنشاء دفعة من الأمثلة باستخدام [`DataCollatorForSeq2Seq`].  الأكثر كفاءة *الحشو الديناميكي* للجمل إلى أطول طول في دفعة أثناء عملية التجميع، بدلاً من حشو مجموعة البيانات بأكملها إلى الحد الأقصى للطول.

-<frameworkcontent>
-<pt>

 ```py
 >>> from transformers import DataCollatorForSeq2Seq

 >>> data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
 ```
-</pt>
-<tf>
-
-```py
->>> from transformers import DataCollatorForSeq2Seq
-
->>> data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
-```
-</tf>
-</frameworkcontent>

 ## التقييم (Evaluate)

@ -170,8 +158,6 @@ pip install transformers datasets evaluate rouge_score

 ## التدريب (Train)

-<frameworkcontent>
-<pt>

 <Tip>

@ -226,91 +212,6 @@ pip install transformers datasets evaluate rouge_score
 ```py
 >>> trainer.push_to_hub()
 ```
-</pt>
-<tf>
-<Tip>
-
-إذا لم تكن معتادًا على ضبط نموذج باستخدام Keras، فألق نظرة على البرنامج التعليمي الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
-
-</Tip>
-لضبط نموذج في TensorFlow، ابدأ بإعداد دالة مُحسِّن وجدول معدل التعلم وبعض معلمات التدريب:
-
-```py
->>> from transformers import create_optimizer, AdamWeightDecay
-
->>> optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
-```
-
-ثم يمكنك تحميل T5 باستخدام [`TFAutoModelForSeq2SeqLM`]:
-
-```py
->>> from transformers import TFAutoModelForSeq2SeqLM
-
->>> model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
-```
-
-حوّل مجموعات البيانات الخاصة بك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
-
-```py
->>> tf_train_set = model.prepare_tf_dataset(
-...     tokenized_billsum["train"],
-...     shuffle=True,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-
->>> tf_test_set = model.prepare_tf_dataset(
-...     tokenized_billsum["test"],
-...     shuffle=False,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-```
-
-قم بتكوين النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن جميع نماذج Transformers لديها دالة خسارة ذات صلة بالمهمة افتراضيًا، لذلك لست بحاجة إلى تحديد واحدة ما لم تكن ترغب في ذلك:
-
-```py
->>> import tensorflow as tf
-
->>> model.compile(optimizer=optimizer)  # No loss argument!
-```
-
-آخر شيئين يجب إعدادهما قبل بدء التدريب هما حساب درجة ROUGE من التنبؤات، وتوفير طريقة لدفع نموذجك إلى Hub. يتم كلاهما باستخدام [استدعاءات Keras](../main_classes/keras_callbacks).
-
-مرر دالة `compute_metrics` الخاصة بك إلى [`~transformers.KerasMetricCallback`]:
-
-```py
->>> from transformers.keras_callbacks import KerasMetricCallback
-
->>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_test_set)
-```
-
-حدد مكان دفع نموذجك ومُحلِّلك اللغوي في [`~transformers.PushToHubCallback`]:
-
-```py
->>> from transformers.keras_callbacks import PushToHubCallback
-
->>> push_to_hub_callback = PushToHubCallback(
-...     output_dir="my_awesome_billsum_model",
-...     tokenizer=tokenizer,
-... )
-```
-
-ثم اجمع استدعاءاتك معًا:
-
-```py
->>> callbacks = [metric_callback, push_to_hub_callback]
-```
-
-أخيرًا، أنت جاهز لبدء تدريب نموذجك! اتصل بـ [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق من الصحة وعدد الحقب واستدعاءاتك لضبط النموذج:
-
-```py
->>> model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
-```
-
-بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
-</tf>
-</frameworkcontent>

 <Tip>

@ -341,8 +242,6 @@ pip install transformers datasets evaluate rouge_score

 يمكنك أيضًا تكرار نتائج `pipeline` يدويًا إذا أردت:

-<frameworkcontent>
-<pt>
 قسم النص وإرجع `input_ids` كتنسورات PyTorch:

 ```py
@ -367,31 +266,3 @@ pip install transformers datasets evaluate rouge_score
 >>> tokenizer.decode(outputs[0], skip_special_tokens=True)
 'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
 ```
-</pt>
-<tf>
-قسم النص وإرجع `input_ids` كتنسورات TensorFlow:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_billsum_model")
->>> inputs = tokenizer(text, return_tensors="tf").input_ids
-```
-
-استخدم طريقة [`~transformers.generation_tf_utils.TFGenerationMixin.generate`] لإنشاء التلخيص. لمزيد من التفاصيل حول استراتيجيات توليد النص المختلفة والمعلمات للتحكم في التوليد، راجع واجهة برمجة تطبيقات [توليد النص](../main_classes/text_generation).
-
-```py
->>> from transformers import TFAutoModelForSeq2SeqLM
-
->>> model = TFAutoModelForSeq2SeqLM.from_pretrained("username/my_awesome_billsum_model")
->>> outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
-```
-
-فك تشفير معرفات الرموز المولدة مرة أخرى إلى نص:
-
-```py
->>> tokenizer.decode(outputs[0], skip_special_tokens=True)
-'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
-```
-</tf>
-</frameworkcontent>
--- a/docs/source/ar/tasks/token_classification.md
+++ b/docs/source/ar/tasks/token_classification.md
@ -151,22 +151,11 @@ pip install transformers datasets evaluate seqeval

 الآن قم بإنشاء دفعة من الأمثلة باستخدام [`DataCollatorWithPadding`].من الأفضل استخدام *الحشو الديناميكي* للجمل إلى أطول طول في دفعة أثناء التجميع، بدلاً من حشو مجموعة البيانات بالكامل إلى الطول الأقصى.

-<frameworkcontent>
-<pt>
 ```py
 >>> from transformers import DataCollatorForTokenClassification

 >>> data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
 ```
-</pt>
-<tf>
-```py
->>> from transformers import DataCollatorForTokenClassification
-
->>> data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="tf")
-```
-</tf>
-</frameworkcontent>

 ## التقييم(Evaluate)

@ -246,8 +235,6 @@ pip install transformers datasets evaluate seqeval
 ... }
 ```

-<frameworkcontent>
-<pt>
 <Tip>

 إذا لم تكن على دراية بتعديل نموذج باستخدام [`Trainer`], ألق نظرة على الدليل التعليمي الأساسي [هنا](../training#train-with-pytorch-trainer)!
@ -302,101 +289,6 @@ pip install transformers datasets evaluate seqeval
 ```py
 >>> trainer.push_to_hub()
 ```
-</pt>
-<tf>
-<Tip>
-
-إذا لم تكن على دراية بتعديل نموذج باستخدام Keras، ألق نظرة على الدليل التعليمي الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
-
-</Tip>
-للتعديل على نموذج في TensorFlow، ابدأ بإعداد دالة محسن، وجدول معدل التعلم، وبعض معلمات التدريب:
-
-```py
->>> from transformers import create_optimizer
-
->>> batch_size = 16
->>> num_train_epochs = 3
->>> num_train_steps = (len(tokenized_wnut["train"]) // batch_size) * num_train_epochs
->>> optimizer, lr_schedule = create_optimizer(
-...     init_lr=2e-5,
-...     num_train_steps=num_train_steps,
-...     weight_decay_rate=0.01,
-...     num_warmup_steps=0,
-... )
-```
-
-ثم يمكنك تحميل DistilBERT مع [`TFAutoModelForTokenClassification`] إلى جانب عدد التسميات المتوقعة، وتخطيطات التسميات:
-
-```py
->>> from transformers import TFAutoModelForTokenClassification
-
->>> model = TFAutoModelForTokenClassification.from_pretrained(
-...     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
-... )
-```
-
-قم بتحويل مجموعات بياناتك إلى تنسيق `tf.data.Dataset` مع [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
-
-```py
->>> tf_train_set = model.prepare_tf_dataset(
-...     tokenized_wnut["train"],
-...     shuffle=True,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-
->>> tf_validation_set = model.prepare_tf_dataset(
-...     tokenized_wnut["validation"],
-...     shuffle=False,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-```
-
-هيّئ النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن نماذج Transformers تتضمن دالة خسارة افتراضية مرتبطة بالمهمة، لذلك لا تحتاج إلى تحديد واحدة إلا إذا كنت ترغب في ذلك:
-
-```py
->>> import tensorflow as tf
-
->>> model.compile(optimizer=optimizer)  # No loss argument!
-```
-
-آخر أمرين يجب إعدادهما قبل بدء التدريب هو حساب درجات seqeval من التنبؤات، وتوفير طريقة لدفع نموذجك إلى Hub. يتم ذلك باستخدام [Keras callbacks](../main_classes/keras_callbacks).
-
-مرر دالة `compute_metrics` الخاصة بك إلى [`~transformers.KerasMetricCallback`]:
-
-```py
->>> from transformers.keras_callbacks import KerasMetricCallback
-
->>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
-```
-
-حدد مكان دفع نموذجك والمحلل اللغوي في [`~transformers.PushToHubCallback`]:
-
-```py
->>> from transformers.keras_callbacks import PushToHubCallback
-
->>> push_to_hub_callback = PushToHubCallback(
-...     output_dir="my_awesome_wnut_model",
-...     tokenizer=tokenizer,
-... )
-```
-
-ثم جمّع callbacks الخاصة بك معًا:
-
-```py
->>> callbacks = [metric_callback, push_to_hub_callback]
-```
-
-أخيرًا، أنت جاهز الآن لبدء تدريب نموذجك! قم باستدعاء [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع بيانات التدريب والتحقق، وعدد الحقبات، وcallbacks لتعديل النموذج:
-
-```py
->>> model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
-```
-
-بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
-</tf>
-</frameworkcontent>

 <Tip>

@ -457,8 +349,6 @@ pip install transformers datasets evaluate seqeval

 يمكنك أيضًا تكرار نتائج `pipeline` يدويًا إذا أردت:

-<frameworkcontent>
-<pt>
 قسّم النص إلى رموز وأرجع المُوتّرات بلغة PyTorch:

 ```py
@ -502,49 +392,3 @@ pip install transformers datasets evaluate seqeval
 'O',
 'O']
 ```
-</pt>
-<tf>
-قسّم النص إلى رموز وأرجع المُوتّرات ب TensorFlow:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
->>> inputs = tokenizer(text, return_tensors="tf")
-```
-
-مرر مدخلاتك إلى النموذج واحصل على `logits`:
-
-```py
->>> from transformers import TFAutoModelForTokenClassification
-
->>> model = TFAutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
->>> logits = model(**inputs).logits
-```
-
-استخرج الفئة ذات الاحتمالية الأعلى، واستخدم جدول `id2label` الخاصة بالنموذج لتحويلها إلى تسمية نصية:
-
-```py
->>> predicted_token_class_ids = tf.math.argmax(logits, axis=-1)
->>> predicted_token_class = [model.config.id2label[t] for t in predicted_token_class_ids[0].numpy().tolist()]
->>> predicted_token_class
-['O',
- 'O',
- 'B-location',
- 'I-location',
- 'B-group',
- 'O',
- 'O',
- 'O',
- 'O',
- 'O',
- 'O',
- 'O',
- 'O',
- 'B-location',
- 'B-location',
- 'O',
- 'O']
-```
-</tf>
-</frameworkcontent>
--- a/docs/source/ar/tasks/translation.md
+++ b/docs/source/ar/tasks/translation.md
@ -113,24 +113,12 @@ pip install transformers datasets evaluate sacrebleu

 الآن أنشئ دفعة من الأمثلة باستخدام [`DataCollatorForSeq2Seq`]. من الأكثر كفاءة *الحشو الديناميكي* للجمل إلى أطول طول في دفعة أثناء التجميع، بدلاً من حشو مجموعة البيانات بأكملها إلى الحد الأقصى للطول.

-<frameworkcontent>
-<pt>

 ```py
 >>> from transformers import DataCollatorForSeq2Seq

 >>> data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
 ```
-</pt>
-<tf>
-
-```py
->>> from transformers import DataCollatorForSeq2Seq
-
->>> data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
-```
-</tf>
-</frameworkcontent>

 ## التقييم (Evaluate)

@ -177,8 +165,6 @@ pip install transformers datasets evaluate sacrebleu

 ## التدريب (Train)

-<frameworkcontent>
-<pt>

 <Tip>

@ -233,91 +219,6 @@ pip install transformers datasets evaluate sacrebleu
 ```py
 >>> trainer.push_to_hub()
 ```
-</pt>
-<tf>
-<Tip>
-
-إذا لم تكن معتادًا على ضبط نموذج باستخدام Keras، فألق نظرة على البرنامج التعليمي الأساسي [هنا](../training#train-a-tensorflow-model-with-keras)!
-
-</Tip>
-لضبط نموذج في TensorFlow، ابدأ بإعداد دالة مُحسِّن وجدول معدل تعلم وبعض المعلمات الفائقة للتدريب:
-
-```py
->>> from transformers import AdamWeightDecay
-
->>> optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
-```
-
-ثم يمكنك تحميل T5 باستخدام [`TFAutoModelForSeq2SeqLM`]:
-
-```py
->>> from transformers import TFAutoModelForSeq2SeqLM
-
->>> model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
-```
-
-حوّل مجموعات البيانات الخاصة بك إلى تنسيق `tf.data.Dataset` باستخدام [`~transformers.TFPreTrainedModel.prepare_tf_dataset`]:
-
-```py
->>> tf_train_set = model.prepare_tf_dataset(
-...     tokenized_books["train"],
-...     shuffle=True,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-
->>> tf_test_set = model.prepare_tf_dataset(
-...     tokenized_books["test"],
-...     shuffle=False,
-...     batch_size=16,
-...     collate_fn=data_collator,
-... )
-```
-
-قم بتكوين النموذج للتدريب باستخدام [`compile`](https://keras.io/api/models/model_training_apis/#compile-method). لاحظ أن جميع نماذج Transformers تحتوي على دالة خسارة ذات صلة بالمهمة بشكل افتراضي، لذلك لا تحتاج إلى تحديد واحدة إلا إذا كنت ترغب في ذلك:
-
-```py
->>> import tensorflow as tf
-
->>> model.compile(optimizer=optimizer)  # No loss argument!
-```
-
-آخر شيئين يجب إعدادهما قبل بدء التدريب هما حساب مقياس SacreBLEU من التوقعات، وتوفير طريقة لدفع نموذجك إلى Hub. يتم كلاهما باستخدام [استدعاءات Keras](../main_classes/keras_callbacks).
-
-مرر دالة `compute_metrics` الخاصة بك إلى [`~transformers.KerasMetricCallback`]:
-
-```py
->>> from transformers.keras_callbacks import KerasMetricCallback
-
->>> metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_test_set)
-```
-
-حدد مكان دفع نموذجك ومعالجك اللغوي في [`~transformers.PushToHubCallback`]:
-
-```py
->>> from transformers.keras_callbacks import PushToHubCallback
-
->>> push_to_hub_callback = PushToHubCallback(
-...     output_dir="my_awesome_opus_books_model",
-...     tokenizer=tokenizer,
-... )
-```
-
-ثم اجمع استدعاءاتك معًا:
-
-```py
->>> callbacks = [metric_callback, push_to_hub_callback]
-```
-
-أخيرًا، أنت جاهز لبدء تدريب نموذجك! اتصل بـ [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) مع مجموعات بيانات التدريب والتحقق من الصحة وعدد الحقب واستدعاءاتك لضبط النموذج:
-
-```py
->>> model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
-```
-
-بمجرد اكتمال التدريب، يتم تحميل نموذجك تلقائيًا إلى Hub حتى يتمكن الجميع من استخدامه!
-</tf>
-</frameworkcontent>

 <Tip>

@ -351,8 +252,6 @@ pip install transformers datasets evaluate sacrebleu

 يمكنك أيضًا تكرار نتائج `pipeline` يدويًا إذا أردت:

-<frameworkcontent>
-<pt>
 قم بتحويل النص إلى رموز وإرجاع `input_ids` كموترات PyTorch:

 ```py
@ -377,31 +276,3 @@ pip install transformers datasets evaluate sacrebleu
 >>> tokenizer.decode(outputs[0], skip_special_tokens=True)
 'Les lignées partagent des ressources avec des bactéries enfixant l'azote.'
 ```
-</pt>
-<tf>
-قم بتحويل النص إلى رموز وإرجاع `input_ids` كموترات TensorFlow:
-
-```py
->>> from transformers import AutoTokenizer
-
->>> tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_opus_books_model")
->>> inputs = tokenizer(text, return_tensors="tf").input_ids
-```
-
-استخدم طريقة [`~transformers.generation_tf_utils.TFGenerationMixin.generate`] لإنشاء الترجمة. لمزيد من التفاصيل حول استراتيجيات توليد النصوص المختلفة والمعلمات للتحكم في التوليد، تحقق من واجهة برمجة تطبيقات [توليد النصوص](../main_classes/text_generation).
-
-```py
->>> from transformers import TFAutoModelForSeq2SeqLM
-
->>> model = TFAutoModelForSeq2SeqLM.from_pretrained("username/my_awesome_opus_books_model")
->>> outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
-```
-
-فك تشفير معرفات الرموز المولدة مرة أخرى إلى نص:
-
-```py
->>> tokenizer.decode(outputs[0], skip_special_tokens=True)
-'Les lugumes partagent les ressources avec des bactéries fixatrices d'azote.'
-```
-</tf>
-</frameworkcontent>
--- a/docs/source/ar/tflite.md
+++ b/docs/source/ar/tflite.md
@ -1,40 +0,0 @@
-# التصدير إلى TFLite
-
-[TensorFlow Lite](https://www.tensorflow.org/lite/guide) هو إطار عمل خفيف الوزن لنشر نماذج التعلم الآلي على الأجهزة المحدودة الموارد، مثل الهواتف المحمولة، والأنظمة المدمجة، وأجهزة إنترنت الأشياء (IoT). تم تصميم TFLite لتشغيل النماذج وتحسينها بكفاءة على هذه الأجهزة ذات الطاقة الحاسوبية والذاكرة واستهلاك الطاقة المحدودة.
-
-يُمثَّل نموذج TensorFlow Lite بتنسيق محمول فعال خاص يُعرَّف بامتداد الملف `.tflite`.
-
-🤗 Optimum يقدم وظيفة لتصدير نماذج 🤗 Transformers إلى TFLite من خلال الوحدة النمطية `exporters.tflite`. بالنسبة لقائمة هندسات النماذج المدعومة، يرجى الرجوع إلى [وثائق 🤗 Optimum](https://huggingface.co/docs/optimum/exporters/tflite/overview).
-
-لتصدير نموذج إلى TFLite، قم بتثبيت متطلبات البرنامج المطلوبة:
-
-```bash
-pip install optimum[exporters-tf]
-```
-
-للاطلاع على جميع المغامﻻت المتاحة، راجع [وثائق 🤗 Optimum](https://huggingface.co/docs/optimum/main/en/exporters/tflite/usage_guides/export_a_model)، أو عرض المساعدة في سطر الأوامر:
-
-```bash
-optimum-cli export tflite --help
-```
-
-لتصدير نسخة النموذج ل 🤗 Hub، على سبيل المثال، `google-bert/bert-base-uncased`، قم بتشغيل الأمر التالي:
-
-```bash
-optimum-cli export tflite --model google-bert/bert-base-uncased --sequence_length 128 bert_tflite/
-```
-
-ستظهر لك السجلات  التي تُبيّن التقدم وموقع حفظ ملف  `model.tflite` الناتج، كما في المثال التالي:
-
-```bash
-Validating TFLite model...
-	-[✓] TFLite model output names match reference model (logits)
-	- Validating TFLite Model output "logits":
-		-[✓] (1, 128, 30522) matches (1, 128, 30522)
-		-[x] values not close enough, max diff: 5.817413330078125e-05 (atol: 1e-05)
-The TensorFlow Lite export succeeded with the warning: The maximum absolute difference between the output of the reference model and the TFLite exported model is not within the set tolerance 1e-05:
- logits: max diff = 5.817413330078125e-05.
- The exported model was saved at: bert_tflite
-```
-
-يُبيّن المثال أعلاه كيفية تصدير نسخة من النموذج ل 🤗 Hub. عند تصدير نموذج محلي، تأكد أولاً من حفظ ملفات أوزان النموذج المجزء اللغوى في نفس المسار (`local_path`). عند استخدام CLI، قم بتمرير `local_path` إلى معامل `model` بدلاً من اسم النسخة على 🤗 Hub.
--- a/docs/source/ar/trainer.md
+++ b/docs/source/ar/trainer.md
@ -611,7 +611,6 @@ accelerate launch \
    --learning_rate 5e-5 \
    --num_train_epochs 3 \
    --output_dir /tmp/$TASK_NAME/ \
-    --overwrite_output_dir
 ```

 يمكنك أيضًا تحديد المعلمات من ملف `config_file.yaml` مباشرة في سطر الأوامر:
@ -634,7 +633,6 @@ accelerate launch --num_processes=2 \
    --learning_rate 5e-5 \
    --num_train_epochs 3 \
    --output_dir /tmp/$TASK_NAME/ \
-    --overwrite_output_dir
 ```

 اطلع على برنامج تعليمي [Launching your Accelerate scripts](https://huggingface.co/docs/accelerate/basic_tutorials/launch) لمعرفة المزيد حول `accelerate_launch` والتكوينات المخصصة.
--- a/docs/source/ar/training.md
+++ b/docs/source/ar/training.md
@ -58,8 +58,6 @@
 في شريط التنقل الأيمن للقفز إلى الإطار الذي تريده - وإذا كنت تريد إخفاء كل المحتوى لإطار معين،
 فاستخدم الزر في الركن العلوي الأيمن من كتلة الإطار!

-<frameworkcontent>
-<pt>
 <Youtube id="nvBXf7s7vTI"/>

 ## التدريب باستخدام PyTorch Trainer
@ -139,124 +137,10 @@
 ```py
 >>> trainer.train()
 ```
-</pt>
-<tf>
-<a id='keras'></a>
-
-<Youtube id="rnTGBy2ax1c"/>
-
-## تدريب نموذج TensorFlow باستخدام Keras
-
-يمكنك أيضًا تدريب نماذج 🤗 Transformers في TensorFlow باستخدام واجهة برمجة تطبيقات Keras!
-
-### تحميل البيانات لـ Keras
-
-عندما تريد تدريب نموذج 🤗 Transformers باستخدام واجهة برمجة تطبيقات Keras، فأنت بحاجة إلى تحويل مجموعة البيانات الخاصة بك إلى تنسيق يفهمه
-Keras. إذا كانت مجموعة البيانات الخاصة بك صغيرة، فيمكنك ببساطة تحويلها بالكامل إلى مصفوفات NumPy وإرسالها إلى Keras.
-دعونا نجرب ذلك أولاً قبل أن نقوم بأي شيء أكثر تعقيدًا.
-
-أولاً، قم بتحميل مجموعة بيانات. سنستخدم مجموعة بيانات CoLA من معيار [GLUE benchmark](https://huggingface.co/datasets/glue)،
-نظرًا لأنه مهمة تصنيف نص ثنائي بسيطة، وسنأخذ فقط قسم التدريب الآن.
-
-```py
-from datasets import load_dataset
-
-dataset = load_dataset("glue"، "cola")
-dataset = dataset ["train"] # خذ فقط قسم التدريب الآن
-```
-
-بعد ذلك، قم بتحميل أداة المُجزّئ اللغوي وقم بترميز البيانات كمصفوفات NumPy. لاحظ أن التصنيفات هي بالفعل قائمة من 0 و 1،
-لذا يمكننا ببساطة تحويل ذلك مباشرة إلى مصفوفة NumPy بدون ترميز!
-
-```py
-from transformers import AutoTokenizer
-import numpy as np
-
-tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
-tokenized_data = tokenizer(dataset["sentence"], return_tensors="np", padding=True)
-# Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
-tokenized_data = dict(tokenized_data)
-
-labels = np.array(dataset["label"])  # Label is already an array of 0 and 1
-```
-
-أخيرًا، قم بتحميل وتجميع وتناسب النموذج. لاحظ أن نماذج Transformers تحتوي جميعها على دالة خسارة ذات صلة بالمهمة بشكل افتراضي، لذا فأنت لست بحاجة إلى تحديد واحدة ما لم ترغب في ذلك:
-
-```py
-from transformers import TFAutoModelForSequenceClassification
-from tensorflow.keras.optimizers import Adam
-
-# تحميل وتجميع النموذج الخاص بنا
-model = TFAutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased")
-# معدلات التعلم المنخفضة أفضل غالبًا لضبط النماذج الدقيقة
-model.compile(optimizer=Adam(3e-5)) # لا توجد دالة خسارة!
-
-model.fit(tokenized_data, labels)
-```
-
-<Tip>
-
-أنت لست مضطرًا لتمرير دالة خسارة إلى نماذجك عند تجميعها! تختار نماذج Hugging Face تلقائيًا
-دالة خسارة مناسبة لمهمتها وهندسة نموذجها إذا تُركت هذه الحجة فارغة. يمكنك دائمًا
-تجاوز ذلك عن طريق تحديد دالة خسارة بنفسك إذا كنت تريد ذلك!
-
-</Tip>
-
-يعمل هذا النهج بشكل رائع لمجموعات البيانات الصغيرة، ولكن بالنسبة لمجموعات البيانات الأكبر، فقد تجد أنه يصبح مشكلة. لماذا؟
-لأن المصفوفة المرمزة والتصنيفات يجب أن يتم تحميلها بالكامل في الذاكرة، ولأن NumPy لا يتعامل مع
-المصفوفات"غير المنتظمة"، لذا حشو كل عينة  إلى طول أطول عينة في مجموعة البيانات بأكملها. سيؤدي ذلك إلى زيادة حجم المصفوفة لديك، وستبطئ الرموز الزائده من عملية التدريب أيضًا!
-
-### تحميل البيانات كـ tf.data.Dataset
-
-إذا كنت تريد تجنب إبطاء التدريب، فيمكنك تحميل بياناتك كـ `tf.data.Dataset` بدلاً من ذلك. على الرغم من أنه يمكنك كتابة خط أنابيب `tf.data` الخاص بك إذا كنت تريد، إلا أن لدينا طريقتين مختصرتين للقيام بذلك:
- [`~TFPreTrainedModel.prepare_tf_dataset`]: هذه هي الطريقة التي نوصي بها في معظم الحالات. نظرًا لأنه طريقة
-على نموذجك، فيمكنه فحص النموذج لتحديد الأعمدة القابلة للاستخدام كمدخلات للنموذج تلقائيًا،
-واستبعاد الأعمدة الأخرى لإنشاء مجموعة بيانات أبسط وأكثر كفاءة.
- [`~datasets.Dataset.to_tf_dataset`]: هذه الطريقة أكثر أساسية، وهي مفيدة عندما تريد التحكم بدقة في كيفية
-إنشاء مجموعة البيانات الخاصة بك، عن طريق تحديد أعمدة `columns` و `label_cols` المحددة التي سيتم تضمينها.
-
-قبل أن تتمكن من استخدام [`~TFPreTrainedModel.prepare_tf_dataset`]، ستحتاج إلى إضافة مخرجات المُجزئ إلى مجموعة البيانات الخاصة بك كأعمدة، كما هو موضح في
-عينة التعليمات البرمجية التالية:
-
-```py
-def tokenize_dataset (data):
-# ستتم إضافة مفاتيح القاموس الذي تمت إعادته كأعمدة إلى مجموعة البيانات
-return tokenizer(data["text"])
-
-
-dataset = dataset.map(tokenize_dataset)
-```
-
-تذكر أن مجموعات بيانات Hugging Face يتم تخزينها على القرص بشكل افتراضي، لذا فلن يؤدي ذلك إلى تضخيم استخدام الذاكرة لديك! بمجرد إضافة الأعمدة، يمكنك بث الدفعات من مجموعة البيانات وإضافة الترميز إلى كل دفعة، مما يقلل بشكل كبير من عدد رموز الترقيم مقارنة بترميز مجموعة البيانات بأكملها.
-
-
-```py
->>> tf_dataset = model.prepare_tf_dataset(dataset["train"], batch_size=16, shuffle=True, tokenizer=tokenizer)
-```
-
-لاحظ أنه في عينة التعليمات البرمجية أعلاه، تحتاج إلى تمرير المُجزئ اللغوي إلى `prepare_tf_dataset` حتى تتمكن من حشو الدُفعات بشكل صحيح أثناء تحميلها.
-إذا كانت جميع العينات في مجموعة البيانات الخاصة بك بنفس الطول ولم يكن الترميز ضروريًا، فيمكنك تخطي هذا المعامل.
-إذا كنت بحاجة إلى القيام بشيء أكثر تعقيدًا من مجرد ترميز العينات (على سبيل المثال، إفساد الرموز للنمذجة اللغوية المُقنعة)،
-فيمكنك استخدام معامل `collate_fn` بدلاً من ذلك لتمرير دالة يتم استدعاؤها لتحويل
-قائمة العينات إلى دفعة وتطبيق أي معالجة مسبقة تريدها. راجع أمثلةنا [examples](https://github.com/huggingface/transformers/tree/main/examples) أو
-[دفاتر الملاحظات](https://huggingface.co/docs/transformers/notebooks) لرؤية هذا النهج في العمل.
-
-بمجرد إنشاء `tf.data.Dataset`، يمكنك تجميع النموذج وتناسبه كما هو الحال من قبل:
-
-```py
-model.compile(optimizer=Adam(3e-5))  # No loss argument!
-
-model.fit(tf_dataset)
-```
-
-</tf>
-</frameworkcontent>

 <a id='pytorch_native'></a>
 ## تدريب في PyTorch الأصلي

-<frameworkcontent>
-<pt>
 <Youtube id="Dh9CL8fyG80"/>

 [`Trainer`] يهتم بحلقة التدريب ويسمح لك بضبط نموذج في سطر واحد من التعليمات البرمجية. بالنسبة للمستخدمين الذين يفضلون كتابة حلقة التدريب الخاصة بهم، يمكنك أيضًا ضبط نموذج 🤗 Transformers في PyTorch الأصلي.
@ -397,8 +281,6 @@ torch.cuda.empty_cache()

 >>> metric.compute()
 ```
-</pt>
-</frameworkcontent>

 <a id='additional-resources'></a>

@ -409,4 +291,4 @@ torch.cuda.empty_cache()
 - [🤗 أمثلة المحولات](https://github.com/huggingface/transformers/tree/main/examples) تتضمن
  النصوص البرمجية لتدريب مهام NLP الشائعة في PyTorch وTensorFlow.

- [🤗 دفاتر ملاحظات المحولات](notebooks) يحتوي على دفاتر ملاحظات مختلفة حول كيفية ضبط نموذج لمهمة محددة في PyTorch وTensorFlow.
+- [🤗 دفاتر ملاحظات المحولات](notebooks) يحتوي على دفاتر ملاحظات مختلفة حول كيفية ضبط نموذج لمهمة محددة في PyTorch وTensorFlow.
--- a/docs/source/de/add_new_model.md
+++ b/docs/source/de/add_new_model.md
@ -53,7 +53,7 @@ Lassen Sie uns daher ein wenig tiefer in das allgemeine Design der Bibliothek ei
 ### Überblick über die Modelle

 Um ein Modell erfolgreich hinzuzufügen, ist es wichtig, die Interaktion zwischen Ihrem Modell und seiner Konfiguration zu verstehen,
-[`PreTrainedModel`] und [`PretrainedConfig`]. Als Beispiel werden wir
+[`PreTrainedModel`] und [`PreTrainedConfig`]. Als Beispiel werden wir
 das Modell, das zu 🤗 Transformers hinzugefügt werden soll, `BrandNewBert` nennen.

 Schauen wir uns das mal an:
@ -81,10 +81,10 @@ model.config  # model has access to its config
 ```

 Ähnlich wie das Modell erbt die Konfiguration grundlegende Serialisierungs- und Deserialisierungsfunktionalitäten von
-[`PretrainedConfig`]. Beachten Sie, dass die Konfiguration und das Modell immer in zwei verschiedene Formate serialisiert werden
+[`PreTrainedConfig`]. Beachten Sie, dass die Konfiguration und das Modell immer in zwei verschiedene Formate serialisiert werden
 unterschiedliche Formate serialisiert werden - das Modell in eine *pytorch_model.bin* Datei und die Konfiguration in eine *config.json* Datei. Aufruf von
 [`~PreTrainedModel.save_pretrained`] wird automatisch
-[`~PretrainedConfig.save_pretrained`] auf, so dass sowohl das Modell als auch die Konfiguration gespeichert werden.
+[`~PreTrainedConfig.save_pretrained`] auf, so dass sowohl das Modell als auch die Konfiguration gespeichert werden.


 ### Code-Stil
--- a/docs/source/de/autoclass_tutorial.md
+++ b/docs/source/de/autoclass_tutorial.md
@ -81,8 +81,6 @@ Laden Sie einen Prozessor mit [`AutoProcessor.from_pretrained`]:

 ## AutoModel

-<frameworkcontent>
-<pt>
 Mit den `AutoModelFor`-Klassen können Sie schließlich ein vortrainiertes Modell für eine bestimmte Aufgabe laden (siehe [hier](model_doc/auto) für eine vollständige Liste der verfügbaren Aufgaben). Laden Sie zum Beispiel ein Modell für die Sequenzklassifikation mit [`AutoModelForSequenceClassification.from_pretrained`]:

 ```py
@ -108,24 +106,3 @@ TensorFlow- und Flax-Checkpoints sind nicht betroffen und können in PyTorch-Arc
 </Tip>

 Im Allgemeinen empfehlen wir die Verwendung der Klasse "AutoTokenizer" und der Klasse "AutoModelFor", um trainierte Instanzen von Modellen zu laden. Dadurch wird sichergestellt, dass Sie jedes Mal die richtige Architektur laden. Im nächsten [Tutorial] (Vorverarbeitung) erfahren Sie, wie Sie Ihren neu geladenen Tokenizer, Feature Extractor und Prozessor verwenden, um einen Datensatz für die Feinabstimmung vorzuverarbeiten.
-</pt>
-<tf>
-Mit den Klassen `TFAutoModelFor` schließlich können Sie ein vortrainiertes Modell für eine bestimmte Aufgabe laden (siehe [hier](model_doc/auto) für eine vollständige Liste der verfügbaren Aufgaben). Laden Sie zum Beispiel ein Modell für die Sequenzklassifikation mit [`TFAutoModelForSequenceClassification.from_pretrained`]:
-
-```py
->>> from transformers import TFAutoModelForSequenceClassification
-
->>> model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-Sie können denselben Prüfpunkt problemlos wiederverwenden, um eine Architektur für eine andere Aufgabe zu laden:
-
-```py
->>> from transformers import TFAutoModelForTokenClassification
-
->>> model = TFAutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
-```
-
-Im Allgemeinen empfehlen wir, die Klasse "AutoTokenizer" und die Klasse "TFAutoModelFor" zu verwenden, um vortrainierte Instanzen von Modellen zu laden. Dadurch wird sichergestellt, dass Sie jedes Mal die richtige Architektur laden. Im nächsten [Tutorial] (Vorverarbeitung) erfahren Sie, wie Sie Ihren neu geladenen Tokenizer, Feature Extractor und Prozessor verwenden, um einen Datensatz für die Feinabstimmung vorzuverarbeiten.
-</tf>
-</frameworkcontent>
--- a/docs/source/de/llm_tutorial.md
+++ b/docs/source/de/llm_tutorial.md
@ -78,10 +78,10 @@ Wenn Sie an der grundlegenden Verwendung von LLMs interessiert sind, ist unsere
 Zunächst müssen Sie das Modell laden.

 ```py
->>> from transformers import AutoModelForCausalLM
+>>> from transformers import AutoModelForCausalLM, BitsAndBytesConfig

 >>> model = AutoModelForCausalLM.from_pretrained(
-...     "openlm-research/open_llama_7b", device_map="auto", load_in_4bit=True
+...     "openlm-research/open_llama_7b", device_map="auto", quantization_config=BitsAndBytesConfig(load_in_4bit=True)
 ... )
 ```

@ -119,12 +119,12 @@ Und das war's! Mit ein paar Zeilen Code können Sie sich die Macht eines LLM zun
 Es gibt viele [Generierungsstrategien](generation_strategies), und manchmal sind die Standardwerte für Ihren Anwendungsfall vielleicht nicht geeignet. Wenn Ihre Ausgaben nicht mit dem übereinstimmen, was Sie erwarten, haben wir eine Liste der häufigsten Fallstricke erstellt und wie Sie diese vermeiden können.

 ```py
->>> from transformers import AutoModelForCausalLM, AutoTokenizer
+>>> from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

 >>> tokenizer = AutoTokenizer.from_pretrained("openlm-research/open_llama_7b")
 >>> tokenizer.pad_token = tokenizer.eos_token  # Llama has no pad token by default
 >>> model = AutoModelForCausalLM.from_pretrained(
-...     "openlm-research/open_llama_7b", device_map="auto", load_in_4bit=True
+...     "openlm-research/open_llama_7b", device_map="auto", quantization_config=BitsAndBytesConfig(load_in_4bit=True)
 ... )
 ```

--- a/docs/source/de/model_sharing.md
+++ b/docs/source/de/model_sharing.md
@ -79,43 +79,15 @@ Um sicherzustellen, dass Ihr Modell von jemandem verwendet werden kann, der mit

 Die Konvertierung eines Checkpoints für ein anderes Framework ist einfach. Stellen Sie sicher, dass Sie PyTorch und TensorFlow installiert haben (siehe [hier](installation) für Installationsanweisungen), und finden Sie dann das spezifische Modell für Ihre Aufgabe in dem anderen Framework. 

-<frameworkcontent>
-<pt>
 Geben Sie `from_tf=True` an, um einen Prüfpunkt von TensorFlow nach PyTorch zu konvertieren:

 ```py
 >>> pt_model = DistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_tf=True)
 >>> pt_model.save_pretrained("path/to/awesome-name-you-picked")
 ```
-</pt>
-<tf>
-Geben Sie `from_pt=True` an, um einen Prüfpunkt von PyTorch nach TensorFlow zu konvertieren:
-
-```py
->>> tf_model = TFDistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_pt=True)
-```
-
-Dann können Sie Ihr neues TensorFlow-Modell mit seinem neuen Checkpoint speichern:
-
-```py
->>> tf_model.save_pretrained("path/to/awesome-name-you-picked")
-```
-</tf>
-<jax>
-Wenn ein Modell in Flax verfügbar ist, können Sie auch einen Kontrollpunkt von PyTorch nach Flax konvertieren:
-
-```py
->>> flax_model = FlaxDistilBertForSequenceClassification.from_pretrained(
-...     "path/to/awesome-name-you-picked", from_pt=True
-... )
-```
-</jax>
-</frameworkcontent>

 ## Ein Modell während des Trainings hochladen

-<frameworkcontent>
-<pt>
 <Youtube id="Z1-XMy-GNLQ"/>

 Die Weitergabe eines Modells an den Hub ist so einfach wie das Hinzufügen eines zusätzlichen Parameters oder Rückrufs. Erinnern Sie sich an das [Feinabstimmungs-Tutorial](training), in der Klasse [`TrainingArguments`] geben Sie Hyperparameter und zusätzliche Trainingsoptionen an. Eine dieser Trainingsoptionen beinhaltet die Möglichkeit, ein Modell direkt an den Hub zu pushen. Setzen Sie `push_to_hub=True` in Ihrer [`TrainingArguments`]:
@ -141,29 +113,6 @@ Nach der Feinabstimmung Ihres Modells rufen Sie [`~transformers.Trainer.push_to_
 ```py
 >>> trainer.push_to_hub()
 ```
-</pt>
-<tf>
-Geben Sie ein Modell mit [`PushToHubCallback`] an den Hub weiter. In der [`PushToHubCallback`] Funktion, fügen Sie hinzu:
-
- Ein Ausgabeverzeichnis für Ihr Modell.
- Einen Tokenizer.
- Die `hub_model_id`, die Ihr Hub-Benutzername und Modellname ist.
-
-```py
->>> from transformers import PushToHubCallback
-
->>> push_to_hub_callback = PushToHubCallback(
-...     output_dir="./your_model_save_path", tokenizer=tokenizer, hub_model_id="your-username/my-awesome-model"
-... )
-```
-
-Fügen Sie den Callback zu [`fit`](https://keras.io/api/models/model_training_apis/) hinzu, und 🤗 Transformers wird das trainierte Modell an den Hub weiterleiten:
-
-```py
->>> model.fit(tf_train_dataset, validation_data=tf_validation_dataset, epochs=3, callbacks=push_to_hub_callback)
-```
-</tf>
-</frameworkcontent>

 ## Verwenden Sie die Funktion `push_to_hub`.

@ -229,4 +178,4 @@ Um sicherzustellen, dass die Benutzer die Fähigkeiten, Grenzen, möglichen Verz
 * Manuelles Erstellen und Hochladen einer "README.md"-Datei.
 * Klicken Sie auf die Schaltfläche **Modellkarte bearbeiten** in Ihrem Modell-Repository.

-Werfen Sie einen Blick auf die DistilBert [model card](https://huggingface.co/distilbert/distilbert-base-uncased) als gutes Beispiel für die Art von Informationen, die eine Modellkarte enthalten sollte. Weitere Details über andere Optionen, die Sie in der Datei "README.md" einstellen können, wie z.B. den Kohlenstoff-Fußabdruck eines Modells oder Beispiele für Widgets, finden Sie in der Dokumentation [hier](https://huggingface.co/docs/hub/models-cards).
+Werfen Sie einen Blick auf die DistilBert [model card](https://huggingface.co/distilbert/distilbert-base-uncased) als gutes Beispiel für die Art von Informationen, die eine Modellkarte enthalten sollte. Weitere Details über andere Optionen, die Sie in der Datei "README.md" einstellen können, wie z.B. den Kohlenstoff-Fußabdruck eines Modells oder Beispiele für Widgets, finden Sie in der Dokumentation [hier](https://huggingface.co/docs/hub/models-cards).
--- a/docs/source/de/preprocessing.md
+++ b/docs/source/de/preprocessing.md
@ -153,8 +153,6 @@ Schließlich möchten Sie, dass der Tokenizer die tatsächlichen Tensoren zurüc

 Setzen Sie den Parameter `return_tensors` entweder auf `pt` für PyTorch, oder `tf` für TensorFlow:

-<frameworkcontent>
-<pt>

 ```py
 >>> batch_sentences = [
@ -174,32 +172,6 @@ Setzen Sie den Parameter `return_tensors` entweder auf `pt` für PyTorch, oder `
                           [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                           [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}
 ```
-</pt>
-<tf>
-```py
->>> batch_sentences = [
-...     "But what about second breakfast?",
-...     "Don't think he knows about second breakfast, Pip.",
-...     "What about elevensies?",
-... ]
->>> encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="tf")
->>> print(encoded_input)
-{'input_ids': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
-array([[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
-       [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
-       [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
-      dtype=int32)>, 
- 'token_type_ids': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
-array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
-       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)>, 
- 'attention_mask': <tf.Tensor: shape=(2, 9), dtype=int32, numpy=
-array([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
-       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-       [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)>}
-```
-</tf>
-</frameworkcontent>

 ## Audio

--- a/Show More
+++ b/Show More