Merge branch 'main' into tensor-cache

fixed dynamic cache
rebased
2025-10-22 10:19:00 +08:00 · 2025-01-24 12:02:45 +01:00 · 2025-01-23 16:45:28 +01:00 · 2025-01-22 17:31:39 +01:00 · 2025-01-22 17:30:23 +01:00 · 2025-01-22 17:29:48 +01:00
4608 changed files with 374666 additions and 487417 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -7,18 +7,6 @@ parameters:
    nightly:
        type: boolean
        default: false
-    GHA_Actor:
-        type: string
-        default: ""
-    GHA_Action:
-        type: string
-        default: ""
-    GHA_Event:
-        type: string
-        default: ""
-    GHA_Meta:
-        type: string
-        default: ""

 jobs:
    # Ensure running with CircleCI/huggingface
@ -70,7 +58,7 @@ jobs:
            - run:
                name: "Prepare pipeline parameters"
                command: |
-                    python utils/process_test_artifacts.py
+                    python utils/process_test_artifacts.py 

            # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
            # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
@ -112,6 +100,8 @@ jobs:

            - run:
                name: "Retrieve Artifact Paths"
+                env:
+                    CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }}
                command: |
                    project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}"
                    job_number=${CIRCLE_BUILD_NUM}
@ -120,7 +110,7 @@ jobs:
            - run:
                name: "Prepare pipeline parameters"
                command: |
-                    python utils/process_test_artifacts.py
+                    python utils/process_test_artifacts.py 

            # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
            # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
@ -156,7 +146,7 @@ jobs:
                  path: ~/transformers/installed.txt
            - run: python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
            - run: ruff check examples tests src utils
-            - run: ruff format examples tests src utils --check
+            - run: ruff format tests src utils --check
            - run: python utils/custom_init_isort.py --check_only
            - run: python utils/sort_auto_mappings.py --check_only
            - run: python utils/check_doc_toc.py
@ -181,16 +171,17 @@ jobs:
                  path: ~/transformers/installed.txt
            - run: python utils/check_copies.py
            - run: python utils/check_modular_conversion.py
+            - run: python utils/check_table.py
            - run: python utils/check_dummies.py
            - run: python utils/check_repo.py
            - run: python utils/check_inits.py
-            - run: python utils/check_pipeline_typing.py
            - run: python utils/check_config_docstrings.py
            - run: python utils/check_config_attributes.py
            - run: python utils/check_doctest_list.py
            - run: make deps_table_check_updated
            - run: python utils/update_metadata.py --check-only
            - run: python utils/check_docstrings.py
+            - run: python utils/check_support_list.py

 workflows:
    version: 2
--- a/.circleci/create_circleci_config.py
+++ b/.circleci/create_circleci_config.py
@ -28,32 +28,13 @@ COMMON_ENV_VARIABLES = {
    "TRANSFORMERS_IS_CI": True,
    "PYTEST_TIMEOUT": 120,
    "RUN_PIPELINE_TESTS": False,
-    # will be adjust in `CircleCIJob.to_dict`.
-    "RUN_FLAKY": True,
+    "RUN_PT_TF_CROSS_TESTS": False,
+    "RUN_PT_FLAX_CROSS_TESTS": False,
 }
 # Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical
-COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "vvv": None, "rsfE":None}
+COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsfE":None}
 DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}]

-# Strings that commonly appear in the output of flaky tests when they fail. These are used with `pytest-rerunfailures`
-# to rerun the tests that match these patterns.
-FLAKY_TEST_FAILURE_PATTERNS = [
-    "OSError",  # Machine/connection transient error
-    "Timeout",  # Machine/connection transient error
-    "ConnectionError",  # Connection transient error
-    "FileNotFoundError",  # Raised by `datasets` on Hub failures
-    "PIL.UnidentifiedImageError",  # Raised by `PIL.Image.open` on connection issues
-    "HTTPError",  # Also catches HfHubHTTPError
-    "AssertionError: Tensor-likes are not close!",  # `torch.testing.assert_close`, we might have unlucky random values
-    # TODO: error downloading tokenizer's `merged.txt` from hub can cause all the exceptions below. Throw and handle
-    # them under a single message.
-    "TypeError: expected str, bytes or os.PathLike object, not NoneType",
-    "TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType",
-    "Converting from Tiktoken failed",
-    "KeyError: <class ",
-    "TypeError: not a string",
-]
-

 class EmptyJob:
    job_name = "empty"
@ -109,9 +90,7 @@ class CircleCIJob:
                self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev"
            print(f"Using {self.docker_image} docker image")
        if self.install_steps is None:
-            self.install_steps = ["uv pip install ."]
-        # Use a custom patched pytest to force exit the process at the end, to avoid `Too long with no output (exceeded 10m0s): context deadline exceeded`
-        self.install_steps.append("uv pip install git+https://github.com/ydshieh/pytest.git@8.4.1-ydshieh")
+            self.install_steps = ["uv venv && uv pip install ."]
        if self.pytest_options is None:
            self.pytest_options = {}
        if isinstance(self.tests_to_run, str):
@ -130,8 +109,6 @@ class CircleCIJob:

    def to_dict(self):
        env = COMMON_ENV_VARIABLES.copy()
-        # Do not run tests decorated by @is_flaky on pull requests
-        env['RUN_FLAKY'] = os.environ.get("CIRCLE_PULL_REQUEST", "") == ""
        env.update(self.additional_env)

        job = {
@ -149,9 +126,7 @@ class CircleCIJob:
                # Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues
        timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else ""
        marker_cmd = f"-m '{self.marker}'" if self.marker is not None else ""
-        junit_flags = f" -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
-        joined_flaky_patterns = "|".join(FLAKY_TEST_FAILURE_PATTERNS)
-        repeat_on_failure_flags = f"--reruns 5 --reruns-delay 2 --only-rerun '({joined_flaky_patterns})'"
+        additional_flags = f" -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
        parallel = f' << pipeline.parameters.{self.job_name}_parallelism >> '
        steps = [
            "checkout",
@ -177,10 +152,9 @@ class CircleCIJob:
                    "command": f"TESTS=$(circleci tests split  --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt"
                    }
            },
-            {"run": {"name": "fetch hub objects before pytest", "command": "python3 utils/fetch_hub_objects_for_ci.py"}},
            {"run": {
                "name": "Run tests",
-                "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
+                "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {additional_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
            },
            {"run": {"name": "Expand to show skipped tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}},
            {"run": {"name": "Failed tests: show reasons",   "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}},
@ -203,6 +177,23 @@ class CircleCIJob:


 # JOBS
+torch_and_tf_job = CircleCIJob(
+    "torch_and_tf",
+    docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
+    additional_env={"RUN_PT_TF_CROSS_TESTS": True},
+    marker="is_pt_tf_cross_test",
+    pytest_options={"rA": None, "durations": 0},
+)
+
+
+torch_and_flax_job = CircleCIJob(
+    "torch_and_flax",
+    additional_env={"RUN_PT_FLAX_CROSS_TESTS": True},
+    docker_image=[{"image":"huggingface/transformers-torch-jax-light"}],
+    marker="is_pt_flax_cross_test",
+    pytest_options={"rA": None, "durations": 0},
+)
+
 torch_job = CircleCIJob(
    "torch",
    docker_image=[{"image": "huggingface/transformers-torch-light"}],
@ -213,9 +204,6 @@ torch_job = CircleCIJob(
 generate_job = CircleCIJob(
    "generate",
    docker_image=[{"image": "huggingface/transformers-torch-light"}],
-    # networkx==3.3 (after #36957) cause some issues
-    # TODO: remove this once it works directly
-    install_steps=["uv pip install ."],
    marker="generate",
    parallelism=6,
 )
@ -232,6 +220,22 @@ processor_job = CircleCIJob(
    parallelism=8,
 )

+tf_job = CircleCIJob(
+    "tf",
+    docker_image=[{"image":"huggingface/transformers-tf-light"}],
+    parallelism=6,
+)
+
+
+flax_job = CircleCIJob(
+    "flax",
+    docker_image=[{"image":"huggingface/transformers-jax-light"}],
+    parallelism=6,
+    pytest_num_workers=16,
+    resource_class="2xlarge",
+)
+
+
 pipelines_torch_job = CircleCIJob(
    "pipelines_torch",
    additional_env={"RUN_PIPELINE_TESTS": True},
@ -240,6 +244,16 @@ pipelines_torch_job = CircleCIJob(
    parallelism=4,
 )

+
+pipelines_tf_job = CircleCIJob(
+    "pipelines_tf",
+    additional_env={"RUN_PIPELINE_TESTS": True},
+    docker_image=[{"image":"huggingface/transformers-tf-light"}],
+    marker="is_pipeline_test",
+    parallelism=4,
+)
+
+
 custom_tokenizers_job = CircleCIJob(
    "custom_tokenizers",
    additional_env={"RUN_CUSTOM_TOKENIZERS": True},
@ -252,16 +266,23 @@ examples_torch_job = CircleCIJob(
    additional_env={"OMP_NUM_THREADS": 8},
    docker_image=[{"image":"huggingface/transformers-examples-torch"}],
    # TODO @ArthurZucker remove this once docker is easier to build
-    install_steps=["uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
-    pytest_num_workers=4,
+    install_steps=["uv venv && uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
 )

+
+examples_tensorflow_job = CircleCIJob(
+    "examples_tensorflow",
+    additional_env={"OMP_NUM_THREADS": 8},
+    docker_image=[{"image":"huggingface/transformers-examples-tf"}],
+)
+
+
 hub_job = CircleCIJob(
    "hub",
    additional_env={"HUGGINGFACE_CO_STAGING": True},
    docker_image=[{"image":"huggingface/transformers-torch-light"}],
    install_steps=[
-        'uv pip install .',
+        'uv venv && uv pip install .',
        'git config --global user.email "ci@dummy.com"',
        'git config --global user.name "ci"',
    ],
@ -275,7 +296,8 @@ onnx_job = CircleCIJob(
    "onnx",
    docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
    install_steps=[
-        "uv pip install .[testing,sentencepiece,onnxruntime,vision,rjieba]",
+        "uv venv",
+        "uv pip install .[torch,tf,testing,sentencepiece,onnxruntime,vision,rjieba]",
    ],
    pytest_options={"k onnx": None},
    pytest_num_workers=1,
@ -302,9 +324,6 @@ repo_utils_job = CircleCIJob(
 non_model_job = CircleCIJob(
    "non_model",
    docker_image=[{"image": "huggingface/transformers-torch-light"}],
-    # networkx==3.3 (after #36957) cause some issues
-    # TODO: remove this once it works directly
-    install_steps=["uv pip install .[serving]"],
    marker="not generate",
    parallelism=6,
 )
@ -322,7 +341,7 @@ doc_test_job = CircleCIJob(
    additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"},
    install_steps=[
        # Add an empty file to keep the test step running correctly even no file is selected to be tested.
-        "uv pip install .",
+        "uv venv && pip install .",
        "touch dummy.py",
        command,
        "cat pr_documentation_tests_temp.txt",
@ -334,9 +353,9 @@ doc_test_job = CircleCIJob(
    pytest_num_workers=1,
 )

-REGULAR_TESTS = [torch_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
-EXAMPLES_TESTS = [examples_torch_job]
-PIPELINE_TESTS = [pipelines_torch_job]
+REGULAR_TESTS = [torch_and_tf_job, torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
+EXAMPLES_TESTS = [examples_torch_job, examples_tensorflow_job]
+PIPELINE_TESTS = [pipelines_torch_job, pipelines_tf_job]
 REPO_UTIL_TESTS = [repo_utils_job]
 DOC_TESTS = [doc_test_job]
 ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job]  # fmt: skip
@ -363,12 +382,7 @@ def create_circleci_config(folder=None):
        "parameters": {
            # Only used to accept the parameters from the trigger
            "nightly": {"type": "boolean", "default": False},
-            # Only used to accept the parameters from GitHub Actions trigger
-            "GHA_Actor": {"type": "string", "default": ""},
-            "GHA_Action": {"type": "string", "default": ""},
-            "GHA_Event": {"type": "string", "default": ""},
-            "GHA_Meta": {"type": "string", "default": ""},
-            "tests_to_run": {"type": "string", "default": ""},
+            "tests_to_run": {"type": "string", "default": ''},
            **{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs},
            **{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs},
        },
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@ -16,7 +16,7 @@ body:
    id: system-info
    attributes:
      label: System Info
-      description: Please share your system info with us. You can run the command `transformers env` and copy-paste its output below.
+      description: Please share your system info with us. You can run the command `transformers-cli env` and copy-paste its output below.
      placeholder: transformers version, platform, python version, ...
    validations:
      required: true
@ -38,30 +38,24 @@ body:

          - text models: @ArthurZucker
          - vision models: @amyeroberts, @qubvel
-          - speech models: @eustlb
+          - speech models: @ylacombe, @eustlb
          - graph models: @clefourrier

        Library:

-          - flax: @gante and @Rocketknight1
+          - flax: @sanchit-gandhi
          - generate: @zucchini-nlp (visual-language models) or @gante (all others)
          - pipelines: @Rocketknight1
          - tensorflow: @gante and @Rocketknight1
          - tokenizers: @ArthurZucker and @itazap
-          - trainer: @zach-huggingface @SunMarc
+          - trainer: @muellerzr @SunMarc

        Integrations:

-          - deepspeed: HF Trainer/Accelerate: @SunMarc @zach-huggingface
+          - deepspeed: HF Trainer/Accelerate: @muellerzr
          - ray/raytune: @richardliaw, @amogkam
          - Big Model Inference: @SunMarc
          - quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber
-        
-        Devices/Backends:
-        
-          - AMD ROCm: @ivarflakstad
-          - Intel XPU: @IlyasMoutawwakil
-          - Ascend NPU: @ivarflakstad 

        Documentation: @stevhliu

@ -78,7 +72,7 @@ body:

        Maintained examples (not research project or legacy):

-          - Flax: @Rocketknight1
+          - Flax: @sanchit-gandhi
          - PyTorch: See Models above and tag the person corresponding to the modality of the example.
          - TensorFlow: @Rocketknight1

@ -112,7 +106,6 @@ body:
      label: Reproduction
      description: |
        Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
-        Please include relevant config information with your code, for example your Trainers, TRL, Peft, and DeepSpeed configs.
        If you have code snippets, error messages, stack traces please provide them here as well.
        Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
        Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
--- a/.github/ISSUE_TEMPLATE/i18n.md
+++ b/.github/ISSUE_TEMPLATE/i18n.md
@ -23,7 +23,7 @@ Some notes:
 * Please translate in a gender-neutral way.
 * Add your translations to the folder called `<languageCode>` inside the [source folder](https://github.com/huggingface/transformers/tree/main/docs/source).
 * Register your translation in `<languageCode>/_toctree.yml`; please follow the order of the [English version](https://github.com/huggingface/transformers/blob/main/docs/source/en/_toctree.yml).
-* Once you're finished, open a pull request and tag this issue by including #issue-number in the description, where issue-number is the number of this issue. Please ping @stevhliu for review.
+* Once you're finished, open a pull request and tag this issue by including #issue-number in the description, where issue-number is the number of this issue. Please ping @stevhliu and @MKhalusova for review.
 * 🙋 If you'd like others to help you with the translation, you can also post in the 🤗 [forums](https://discuss.huggingface.co/).

 ## Get Started section
--- a/.github/ISSUE_TEMPLATE/migration.yml
+++ b/.github/ISSUE_TEMPLATE/migration.yml
@ -6,7 +6,7 @@ body:
    id: system-info
    attributes:
      label: System Info
-      description: Please share your system info with us. You can run the command `transformers env` and copy-paste its output below.
+      description: Please share your system info with us. You can run the command `transformers-cli env` and copy-paste its output below.
      render: shell
      placeholder: transformers version, platform, python version, ...
    validations:
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -41,22 +41,22 @@ Models:

 - text models: @ArthurZucker
 - vision models: @amyeroberts, @qubvel
- speech models: @eustlb
+- speech models: @ylacombe, @eustlb
 - graph models: @clefourrier

 Library:

- flax: @gante and @Rocketknight1
+- flax: @sanchit-gandhi
 - generate: @zucchini-nlp (visual-language models) or @gante (all others)
 - pipelines: @Rocketknight1
 - tensorflow: @gante and @Rocketknight1
 - tokenizers: @ArthurZucker
- trainer: @zach-huggingface, @SunMarc and @qgallouedec
+- trainer: @muellerzr and @SunMarc
 - chat templates: @Rocketknight1

 Integrations:

- deepspeed: HF Trainer/Accelerate: @SunMarc @zach-huggingface
+- deepspeed: HF Trainer/Accelerate: @muellerzr
 - ray/raytune: @richardliaw, @amogkam
 - Big Model Inference: @SunMarc
 - quantization (bitsandbytes, autogpt): @SunMarc @MekkCyber
@ -72,7 +72,7 @@ HF projects:

 Maintained examples (not research project or legacy):

- Flax: @Rocketknight1
+- Flax: @sanchit-gandhi
 - PyTorch: See Models above and tag the person corresponding to the modality of the example.
 - TensorFlow: @Rocketknight1

--- a/.github/scripts/assign_reviewers.py
+++ b/.github/scripts/assign_reviewers.py
@ -1,120 +0,0 @@
-# coding=utf-8
-# Copyright 2025 the HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import github
-import json
-from github import Github
-import re
-from collections import Counter
-from pathlib import Path
-
-def pattern_to_regex(pattern):
-    if pattern.startswith("/"):
-        start_anchor = True
-        pattern = re.escape(pattern[1:])
-    else:
-        start_anchor = False
-        pattern = re.escape(pattern)
-    # Replace `*` with "any number of non-slash characters"
-    pattern = pattern.replace(r"\*", "[^/]*")
-    if start_anchor:
-        pattern = r"^\/?" + pattern  # Allow an optional leading slash after the start of the string
-    return pattern
-
-def get_file_owners(file_path, codeowners_lines):
-    # Process lines in reverse (last matching pattern takes precedence)
-    for line in reversed(codeowners_lines):
-        # Skip comments and empty lines, strip inline comments
-        line = line.split('#')[0].strip()
-        if not line:
-            continue
-
-        # Split into pattern and owners
-        parts = line.split()
-        pattern = parts[0]
-        # Can be empty, e.g. for dummy files with explicitly no owner!
-        owners = [owner.removeprefix("@") for owner in parts[1:]]
-
-        # Check if file matches pattern
-        file_regex = pattern_to_regex(pattern)
-        if re.search(file_regex, file_path) is not None:
-            return owners  # Remember, can still be empty!
-    return []  # Should never happen, but just in case
-
-def pr_author_is_in_hf(pr_author, codeowners_lines):
-    # Check if the PR author is in the codeowners file
-    for line in codeowners_lines:
-        line = line.split('#')[0].strip()
-        if not line:
-            continue
-
-        # Split into pattern and owners
-        parts = line.split()
-        owners = [owner.removeprefix("@") for owner in parts[1:]]
-
-        if pr_author in owners:
-            return True
-    return False
-
-def main():
-    script_dir = Path(__file__).parent.absolute()
-    with open(script_dir / "codeowners_for_review_action") as f:
-        codeowners_lines = f.readlines()
-
-    g = Github(os.environ['GITHUB_TOKEN'])
-    repo = g.get_repo("huggingface/transformers")
-    with open(os.environ['GITHUB_EVENT_PATH']) as f:
-        event = json.load(f)
-
-    # The PR number is available in the event payload
-    pr_number = event['pull_request']['number']
-    pr = repo.get_pull(pr_number)
-    pr_author = pr.user.login
-    if pr_author_is_in_hf(pr_author, codeowners_lines):
-        print(f"PR author {pr_author} is in codeowners, skipping review request.")
-        return
-
-    existing_reviews = list(pr.get_reviews())
-    if existing_reviews:
-        print(f"Already has reviews: {[r.user.login for r in existing_reviews]}")
-        return
-
-    users_requested, teams_requested = pr.get_review_requests()
-    users_requested = list(users_requested)
-    if users_requested:
-        print(f"Reviewers already requested: {users_requested}")
-        return
-
-    locs_per_owner = Counter()
-    for file in pr.get_files():
-        owners = get_file_owners(file.filename, codeowners_lines)
-        for owner in owners:
-            locs_per_owner[owner] += file.changes
-
-    # Assign the top 2 based on locs changed as reviewers, but skip the owner if present
-    locs_per_owner.pop(pr_author, None)
-    top_owners = locs_per_owner.most_common(2)
-    print("Top owners", top_owners)
-    top_owners = [owner[0] for owner in top_owners]
-    try:
-        pr.create_review_request(top_owners)
-    except github.GithubException as e:
-        print(f"Failed to request review for {top_owners}: {e}")
-
-
-
-if __name__ == "__main__":
-    main()
--- a/.github/scripts/codeowners_for_review_action
+++ b/.github/scripts/codeowners_for_review_action
@ -1,370 +0,0 @@
-# Top-level rules are matched only if nothing else matches
-* @Rocketknight1 @ArthurZucker # if no one is pinged based on the other rules, he will do the dispatch
-*.md @stevhliu
-*tokenization* @ArthurZucker
-docs/ @stevhliu
-/benchmark/ @McPatate
-/docker/ @ydshieh @ArthurZucker
-
-# More high-level globs catch cases when specific rules later don't apply
-/src/transformers/models/*/processing* @molbap @yonigozlan @qubvel
-/src/transformers/models/*/image_processing* @qubvel
-/src/transformers/models/*/image_processing_*_fast* @yonigozlan
-
-# Owners of subsections of the library
-/src/transformers/generation/ @gante
-/src/transformers/pipeline/ @Rocketknight1 @yonigozlan
-/src/transformers/integrations/ @SunMarc @MekkCyber @zach-huggingface
-/src/transformers/quantizers/ @SunMarc @MekkCyber
-tests/ @ydshieh
-tests/generation/ @gante
-
-/src/transformers/models/auto/ @ArthurZucker
-/src/transformers/utils/ @ArthurZucker @Rocketknight1
-/src/transformers/loss/ @ArthurZucker
-/src/transformers/onnx/ @michaelbenayoun
-
-# Specific files come after the sections/globs, so they take priority
-/.circleci/config.yml @ArthurZucker @ydshieh
-/utils/tests_fetcher.py @ydshieh
-trainer.py @zach-huggingface @SunMarc
-trainer_utils.py @zach-huggingface @SunMarc
-/utils/modular_model_converter.py @Cyrilvallez @ArthurZucker
-
-# Owners of individual models are specific / high priority, and so they come last
-# mod* captures modeling and modular files
-
-# Text models
-/src/transformers/models/albert/mod*_albert* @ArthurZucker
-/src/transformers/models/bamba/mod*_bamba* @ArthurZucker
-/src/transformers/models/bart/mod*_bart* @ArthurZucker
-/src/transformers/models/barthez/mod*_barthez* @ArthurZucker
-/src/transformers/models/bartpho/mod*_bartpho* @ArthurZucker
-/src/transformers/models/bert/mod*_bert* @ArthurZucker
-/src/transformers/models/bert_generation/mod*_bert_generation* @ArthurZucker
-/src/transformers/models/bert_japanese/mod*_bert_japanese* @ArthurZucker
-/src/transformers/models/bertweet/mod*_bertweet* @ArthurZucker
-/src/transformers/models/big_bird/mod*_big_bird* @ArthurZucker
-/src/transformers/models/bigbird_pegasus/mod*_bigbird_pegasus* @ArthurZucker
-/src/transformers/models/biogpt/mod*_biogpt* @ArthurZucker
-/src/transformers/models/blenderbot/mod*_blenderbot* @ArthurZucker
-/src/transformers/models/blenderbot_small/mod*_blenderbot_small* @ArthurZucker
-/src/transformers/models/bloom/mod*_bloom* @ArthurZucker
-/src/transformers/models/bort/mod*_bort* @ArthurZucker
-/src/transformers/models/byt5/mod*_byt5* @ArthurZucker
-/src/transformers/models/camembert/mod*_camembert* @ArthurZucker
-/src/transformers/models/canine/mod*_canine* @ArthurZucker
-/src/transformers/models/codegen/mod*_codegen* @ArthurZucker
-/src/transformers/models/code_llama/mod*_code_llama* @ArthurZucker
-/src/transformers/models/cohere/mod*_cohere* @ArthurZucker
-/src/transformers/models/cohere2/mod*_cohere2* @ArthurZucker
-/src/transformers/models/convbert/mod*_convbert* @ArthurZucker
-/src/transformers/models/cpm/mod*_cpm* @ArthurZucker
-/src/transformers/models/cpmant/mod*_cpmant* @ArthurZucker
-/src/transformers/models/ctrl/mod*_ctrl* @ArthurZucker
-/src/transformers/models/dbrx/mod*_dbrx* @ArthurZucker
-/src/transformers/models/deberta/mod*_deberta* @ArthurZucker
-/src/transformers/models/deberta_v2/mod*_deberta_v2* @ArthurZucker
-/src/transformers/models/dialogpt/mod*_dialogpt* @ArthurZucker
-/src/transformers/models/diffllama/mod*_diffllama* @ArthurZucker
-/src/transformers/models/distilbert/mod*_distilbert* @ArthurZucker
-/src/transformers/models/dpr/mod*_dpr* @ArthurZucker
-/src/transformers/models/electra/mod*_electra* @ArthurZucker
-/src/transformers/models/encoder_decoder/mod*_encoder_decoder* @ArthurZucker
-/src/transformers/models/ernie/mod*_ernie* @ArthurZucker
-/src/transformers/models/ernie_m/mod*_ernie_m* @ArthurZucker
-/src/transformers/models/esm/mod*_esm* @ArthurZucker
-/src/transformers/models/falcon/mod*_falcon* @ArthurZucker
-/src/transformers/models/falcon3/mod*_falcon3* @ArthurZucker
-/src/transformers/models/falcon_mamba/mod*_falcon_mamba* @ArthurZucker
-/src/transformers/models/fastspeech2_conformer/mod*_fastspeech2_conformer* @ArthurZucker
-/src/transformers/models/flan_t5/mod*_flan_t5* @ArthurZucker
-/src/transformers/models/flan_ul2/mod*_flan_ul2* @ArthurZucker
-/src/transformers/models/flaubert/mod*_flaubert* @ArthurZucker
-/src/transformers/models/fnet/mod*_fnet* @ArthurZucker
-/src/transformers/models/fsmt/mod*_fsmt* @ArthurZucker
-/src/transformers/models/funnel/mod*_funnel* @ArthurZucker
-/src/transformers/models/fuyu/mod*_fuyu* @ArthurZucker
-/src/transformers/models/gemma/mod*_gemma* @ArthurZucker
-/src/transformers/models/gemma2/mod*_gemma2* @ArthurZucker
-/src/transformers/models/glm/mod*_glm* @ArthurZucker
-/src/transformers/models/openai_gpt/mod*_openai_gpt* @ArthurZucker
-/src/transformers/models/gpt_neo/mod*_gpt_neo* @ArthurZucker
-/src/transformers/models/gpt_neox/mod*_gpt_neox* @ArthurZucker
-/src/transformers/models/gpt_neox_japanese/mod*_gpt_neox_japanese* @ArthurZucker
-/src/transformers/models/gptj/mod*_gptj* @ArthurZucker
-/src/transformers/models/gpt2/mod*_gpt2* @ArthurZucker
-/src/transformers/models/gpt_bigcode/mod*_gpt_bigcode* @ArthurZucker
-/src/transformers/models/gptsan_japanese/mod*_gptsan_japanese* @ArthurZucker
-/src/transformers/models/gpt_sw3/mod*_gpt_sw3* @ArthurZucker
-/src/transformers/models/granite/mod*_granite* @ArthurZucker
-/src/transformers/models/granitemoe/mod*_granitemoe* @ArthurZucker
-/src/transformers/models/herbert/mod*_herbert* @ArthurZucker
-/src/transformers/models/ibert/mod*_ibert* @ArthurZucker
-/src/transformers/models/jamba/mod*_jamba* @ArthurZucker
-/src/transformers/models/jetmoe/mod*_jetmoe* @ArthurZucker
-/src/transformers/models/jukebox/mod*_jukebox* @ArthurZucker
-/src/transformers/models/led/mod*_led* @ArthurZucker
-/src/transformers/models/llama/mod*_llama* @ArthurZucker @Cyrilvallez
-/src/transformers/models/longformer/mod*_longformer* @ArthurZucker
-/src/transformers/models/longt5/mod*_longt5* @ArthurZucker
-/src/transformers/models/luke/mod*_luke* @ArthurZucker
-/src/transformers/models/m2m_100/mod*_m2m_100* @ArthurZucker
-/src/transformers/models/madlad_400/mod*_madlad_400* @ArthurZucker
-/src/transformers/models/mamba/mod*_mamba* @ArthurZucker
-/src/transformers/models/mamba2/mod*_mamba2* @ArthurZucker
-/src/transformers/models/marian/mod*_marian* @ArthurZucker
-/src/transformers/models/markuplm/mod*_markuplm* @ArthurZucker
-/src/transformers/models/mbart/mod*_mbart* @ArthurZucker
-/src/transformers/models/mega/mod*_mega* @ArthurZucker
-/src/transformers/models/megatron_bert/mod*_megatron_bert* @ArthurZucker
-/src/transformers/models/megatron_gpt2/mod*_megatron_gpt2* @ArthurZucker
-/src/transformers/models/mistral/mod*_mistral* @ArthurZucker
-/src/transformers/models/mixtral/mod*_mixtral* @ArthurZucker
-/src/transformers/models/mluke/mod*_mluke* @ArthurZucker
-/src/transformers/models/mobilebert/mod*_mobilebert* @ArthurZucker
-/src/transformers/models/modernbert/mod*_modernbert* @ArthurZucker
-/src/transformers/models/mpnet/mod*_mpnet* @ArthurZucker
-/src/transformers/models/mpt/mod*_mpt* @ArthurZucker
-/src/transformers/models/mra/mod*_mra* @ArthurZucker
-/src/transformers/models/mt5/mod*_mt5* @ArthurZucker
-/src/transformers/models/mvp/mod*_mvp* @ArthurZucker
-/src/transformers/models/myt5/mod*_myt5* @ArthurZucker
-/src/transformers/models/nemotron/mod*_nemotron* @ArthurZucker
-/src/transformers/models/nezha/mod*_nezha* @ArthurZucker
-/src/transformers/models/nllb/mod*_nllb* @ArthurZucker
-/src/transformers/models/nllb_moe/mod*_nllb_moe* @ArthurZucker
-/src/transformers/models/nystromformer/mod*_nystromformer* @ArthurZucker
-/src/transformers/models/olmo/mod*_olmo* @ArthurZucker
-/src/transformers/models/olmo2/mod*_olmo2* @ArthurZucker
-/src/transformers/models/olmoe/mod*_olmoe* @ArthurZucker
-/src/transformers/models/open_llama/mod*_open_llama* @ArthurZucker
-/src/transformers/models/opt/mod*_opt* @ArthurZucker
-/src/transformers/models/pegasus/mod*_pegasus* @ArthurZucker
-/src/transformers/models/pegasus_x/mod*_pegasus_x* @ArthurZucker
-/src/transformers/models/persimmon/mod*_persimmon* @ArthurZucker
-/src/transformers/models/phi/mod*_phi* @ArthurZucker
-/src/transformers/models/phi3/mod*_phi3* @ArthurZucker
-/src/transformers/models/phimoe/mod*_phimoe* @ArthurZucker
-/src/transformers/models/phobert/mod*_phobert* @ArthurZucker
-/src/transformers/models/plbart/mod*_plbart* @ArthurZucker
-/src/transformers/models/prophetnet/mod*_prophetnet* @ArthurZucker
-/src/transformers/models/qdqbert/mod*_qdqbert* @ArthurZucker
-/src/transformers/models/qwen2/mod*_qwen2* @ArthurZucker
-/src/transformers/models/qwen2_moe/mod*_qwen2_moe* @ArthurZucker
-/src/transformers/models/rag/mod*_rag* @ArthurZucker
-/src/transformers/models/realm/mod*_realm* @ArthurZucker
-/src/transformers/models/recurrent_gemma/mod*_recurrent_gemma* @ArthurZucker
-/src/transformers/models/reformer/mod*_reformer* @ArthurZucker
-/src/transformers/models/rembert/mod*_rembert* @ArthurZucker
-/src/transformers/models/retribert/mod*_retribert* @ArthurZucker
-/src/transformers/models/roberta/mod*_roberta* @ArthurZucker
-/src/transformers/models/roberta_prelayernorm/mod*_roberta_prelayernorm* @ArthurZucker
-/src/transformers/models/roc_bert/mod*_roc_bert* @ArthurZucker
-/src/transformers/models/roformer/mod*_roformer* @ArthurZucker
-/src/transformers/models/rwkv/mod*_rwkv* @ArthurZucker
-/src/transformers/models/splinter/mod*_splinter* @ArthurZucker
-/src/transformers/models/squeezebert/mod*_squeezebert* @ArthurZucker
-/src/transformers/models/stablelm/mod*_stablelm* @ArthurZucker
-/src/transformers/models/starcoder2/mod*_starcoder2* @ArthurZucker
-/src/transformers/models/switch_transformers/mod*_switch_transformers* @ArthurZucker
-/src/transformers/models/t5/mod*_t5* @ArthurZucker
-/src/transformers/models/t5v1.1/mod*_t5v1.1* @ArthurZucker
-/src/transformers/models/tapex/mod*_tapex* @ArthurZucker
-/src/transformers/models/transfo_xl/mod*_transfo_xl* @ArthurZucker
-/src/transformers/models/ul2/mod*_ul2* @ArthurZucker
-/src/transformers/models/umt5/mod*_umt5* @ArthurZucker
-/src/transformers/models/xmod/mod*_xmod* @ArthurZucker
-/src/transformers/models/xglm/mod*_xglm* @ArthurZucker
-/src/transformers/models/xlm/mod*_xlm* @ArthurZucker
-/src/transformers/models/xlm_prophetnet/mod*_xlm_prophetnet* @ArthurZucker
-/src/transformers/models/xlm_roberta/mod*_xlm_roberta* @ArthurZucker
-/src/transformers/models/xlm_roberta_xl/mod*_xlm_roberta_xl* @ArthurZucker
-/src/transformers/models/xlm_v/mod*_xlm_v* @ArthurZucker
-/src/transformers/models/xlnet/mod*_xlnet* @ArthurZucker
-/src/transformers/models/yoso/mod*_yoso* @ArthurZucker
-/src/transformers/models/zamba/mod*_zamba* @ArthurZucker
-
-# Vision models
-/src/transformers/models/beit/mod*_beit* @amyeroberts @qubvel
-/src/transformers/models/bit/mod*_bit* @amyeroberts @qubvel
-/src/transformers/models/conditional_detr/mod*_conditional_detr* @amyeroberts @qubvel
-/src/transformers/models/convnext/mod*_convnext* @amyeroberts @qubvel
-/src/transformers/models/convnextv2/mod*_convnextv2* @amyeroberts @qubvel
-/src/transformers/models/cvt/mod*_cvt* @amyeroberts @qubvel
-/src/transformers/models/deformable_detr/mod*_deformable_detr* @amyeroberts @qubvel
-/src/transformers/models/deit/mod*_deit* @amyeroberts @qubvel
-/src/transformers/models/depth_anything/mod*_depth_anything* @amyeroberts @qubvel
-/src/transformers/models/depth_anything_v2/mod*_depth_anything_v2* @amyeroberts @qubvel
-/src/transformers/models/deta/mod*_deta* @amyeroberts @qubvel
-/src/transformers/models/detr/mod*_detr* @amyeroberts @qubvel
-/src/transformers/models/dinat/mod*_dinat* @amyeroberts @qubvel
-/src/transformers/models/dinov2/mod*_dinov2* @amyeroberts @qubvel
-/src/transformers/models/dinov2_with_registers/mod*_dinov2_with_registers* @amyeroberts @qubvel
-/src/transformers/models/dit/mod*_dit* @amyeroberts @qubvel
-/src/transformers/models/dpt/mod*_dpt* @amyeroberts @qubvel
-/src/transformers/models/efficientformer/mod*_efficientformer* @amyeroberts @qubvel
-/src/transformers/models/efficientnet/mod*_efficientnet* @amyeroberts @qubvel
-/src/transformers/models/focalnet/mod*_focalnet* @amyeroberts @qubvel
-/src/transformers/models/glpn/mod*_glpn* @amyeroberts @qubvel
-/src/transformers/models/hiera/mod*_hiera* @amyeroberts @qubvel
-/src/transformers/models/ijepa/mod*_ijepa* @amyeroberts @qubvel
-/src/transformers/models/imagegpt/mod*_imagegpt* @amyeroberts @qubvel
-/src/transformers/models/levit/mod*_levit* @amyeroberts @qubvel
-/src/transformers/models/mask2former/mod*_mask2former* @amyeroberts @qubvel
-/src/transformers/models/maskformer/mod*_maskformer* @amyeroberts @qubvel
-/src/transformers/models/mobilenet_v1/mod*_mobilenet_v1* @amyeroberts @qubvel
-/src/transformers/models/mobilenet_v2/mod*_mobilenet_v2* @amyeroberts @qubvel
-/src/transformers/models/mobilevit/mod*_mobilevit* @amyeroberts @qubvel
-/src/transformers/models/mobilevitv2/mod*_mobilevitv2* @amyeroberts @qubvel
-/src/transformers/models/nat/mod*_nat* @amyeroberts @qubvel
-/src/transformers/models/poolformer/mod*_poolformer* @amyeroberts @qubvel
-/src/transformers/models/pvt/mod*_pvt* @amyeroberts @qubvel
-/src/transformers/models/pvt_v2/mod*_pvt_v2* @amyeroberts @qubvel
-/src/transformers/models/regnet/mod*_regnet* @amyeroberts @qubvel
-/src/transformers/models/resnet/mod*_resnet* @amyeroberts @qubvel
-/src/transformers/models/rt_detr/mod*_rt_detr* @amyeroberts @qubvel
-/src/transformers/models/segformer/mod*_segformer* @amyeroberts @qubvel
-/src/transformers/models/seggpt/mod*_seggpt* @amyeroberts @qubvel
-/src/transformers/models/superpoint/mod*_superpoint* @amyeroberts @qubvel
-/src/transformers/models/swiftformer/mod*_swiftformer* @amyeroberts @qubvel
-/src/transformers/models/swin/mod*_swin* @amyeroberts @qubvel
-/src/transformers/models/swinv2/mod*_swinv2* @amyeroberts @qubvel
-/src/transformers/models/swin2sr/mod*_swin2sr* @amyeroberts @qubvel
-/src/transformers/models/table_transformer/mod*_table_transformer* @amyeroberts @qubvel
-/src/transformers/models/textnet/mod*_textnet* @amyeroberts @qubvel
-/src/transformers/models/timm_wrapper/mod*_timm_wrapper* @amyeroberts @qubvel
-/src/transformers/models/upernet/mod*_upernet* @amyeroberts @qubvel
-/src/transformers/models/van/mod*_van* @amyeroberts @qubvel
-/src/transformers/models/vit/mod*_vit* @amyeroberts @qubvel
-/src/transformers/models/vit_hybrid/mod*_vit_hybrid* @amyeroberts @qubvel
-/src/transformers/models/vitdet/mod*_vitdet* @amyeroberts @qubvel
-/src/transformers/models/vit_mae/mod*_vit_mae* @amyeroberts @qubvel
-/src/transformers/models/vitmatte/mod*_vitmatte* @amyeroberts @qubvel
-/src/transformers/models/vit_msn/mod*_vit_msn* @amyeroberts @qubvel
-/src/transformers/models/vitpose/mod*_vitpose* @amyeroberts @qubvel
-/src/transformers/models/yolos/mod*_yolos* @amyeroberts @qubvel
-/src/transformers/models/zoedepth/mod*_zoedepth* @amyeroberts @qubvel
-
-# Audio models
-/src/transformers/models/audio_spectrogram_transformer/mod*_audio_spectrogram_transformer* @eustlb
-/src/transformers/models/bark/mod*_bark* @eustlb
-/src/transformers/models/clap/mod*_clap* @eustlb
-/src/transformers/models/dac/mod*_dac* @eustlb
-/src/transformers/models/encodec/mod*_encodec* @eustlb
-/src/transformers/models/hubert/mod*_hubert* @eustlb
-/src/transformers/models/mctct/mod*_mctct* @eustlb
-/src/transformers/models/mimi/mod*_mimi* @eustlb
-/src/transformers/models/mms/mod*_mms* @eustlb
-/src/transformers/models/moshi/mod*_moshi* @eustlb
-/src/transformers/models/musicgen/mod*_musicgen* @eustlb
-/src/transformers/models/musicgen_melody/mod*_musicgen_melody* @eustlb
-/src/transformers/models/pop2piano/mod*_pop2piano* @eustlb
-/src/transformers/models/seamless_m4t/mod*_seamless_m4t* @eustlb
-/src/transformers/models/seamless_m4t_v2/mod*_seamless_m4t_v2* @eustlb
-/src/transformers/models/sew/mod*_sew* @eustlb
-/src/transformers/models/sew_d/mod*_sew_d* @eustlb
-/src/transformers/models/speech_to_text/mod*_speech_to_text* @eustlb
-/src/transformers/models/speech_to_text_2/mod*_speech_to_text_2* @eustlb
-/src/transformers/models/speecht5/mod*_speecht5* @eustlb
-/src/transformers/models/unispeech/mod*_unispeech* @eustlb
-/src/transformers/models/unispeech_sat/mod*_unispeech_sat* @eustlb
-/src/transformers/models/univnet/mod*_univnet* @eustlb
-/src/transformers/models/vits/mod*_vits* @eustlb
-/src/transformers/models/wav2vec2/mod*_wav2vec2* @eustlb
-/src/transformers/models/wav2vec2_bert/mod*_wav2vec2_bert* @eustlb
-/src/transformers/models/wav2vec2_conformer/mod*_wav2vec2_conformer* @eustlb
-/src/transformers/models/wav2vec2_phoneme/mod*_wav2vec2_phoneme* @eustlb
-/src/transformers/models/wavlm/mod*_wavlm* @eustlb
-/src/transformers/models/whisper/mod*_whisper* @eustlb
-/src/transformers/models/xls_r/mod*_xls_r* @eustlb
-/src/transformers/models/xlsr_wav2vec2/mod*_xlsr_wav2vec2* @eustlb
-
-# Video models
-/src/transformers/models/timesformer/mod*_timesformer* @Rocketknight1
-/src/transformers/models/videomae/mod*_videomae* @Rocketknight1
-/src/transformers/models/vivit/mod*_vivit* @Rocketknight1
-
-# Multimodal models
-/src/transformers/models/align/mod*_align* @zucchini-nlp
-/src/transformers/models/altclip/mod*_altclip* @zucchini-nlp
-/src/transformers/models/aria/mod*_aria* @zucchini-nlp
-/src/transformers/models/blip/mod*_blip* @zucchini-nlp
-/src/transformers/models/blip_2/mod*_blip_2* @zucchini-nlp
-/src/transformers/models/bridgetower/mod*_bridgetower* @zucchini-nlp
-/src/transformers/models/bros/mod*_bros* @zucchini-nlp
-/src/transformers/models/chameleon/mod*_chameleon* @zucchini-nlp
-/src/transformers/models/chinese_clip/mod*_chinese_clip* @zucchini-nlp
-/src/transformers/models/clip/mod*_clip* @zucchini-nlp
-/src/transformers/models/clipseg/mod*_clipseg* @zucchini-nlp
-/src/transformers/models/clvp/mod*_clvp* @zucchini-nlp
-/src/transformers/models/colpali/mod*_colpali* @zucchini-nlp @yonigozlan
-/src/transformers/models/data2vec/mod*_data2vec* @zucchini-nlp
-/src/transformers/models/deplot/mod*_deplot* @zucchini-nlp
-/src/transformers/models/donut/mod*_donut* @zucchini-nlp
-/src/transformers/models/flava/mod*_flava* @zucchini-nlp
-/src/transformers/models/git/mod*_git* @zucchini-nlp
-/src/transformers/models/grounding_dino/mod*_grounding_dino* @qubvel
-/src/transformers/models/groupvit/mod*_groupvit* @zucchini-nlp
-/src/transformers/models/idefics/mod*_idefics* @zucchini-nlp
-/src/transformers/models/idefics2/mod*_idefics2* @zucchini-nlp
-/src/transformers/models/idefics3/mod*_idefics3* @zucchini-nlp
-/src/transformers/models/instructblip/mod*_instructblip* @zucchini-nlp
-/src/transformers/models/instructblipvideo/mod*_instructblipvideo* @zucchini-nlp
-/src/transformers/models/kosmos_2/mod*_kosmos_2* @zucchini-nlp
-/src/transformers/models/layoutlm/mod*_layoutlm* @NielsRogge
-/src/transformers/models/layoutlmv2/mod*_layoutlmv2* @NielsRogge
-/src/transformers/models/layoutlmv3/mod*_layoutlmv3* @NielsRogge
-/src/transformers/models/layoutxlm/mod*_layoutxlm* @NielsRogge
-/src/transformers/models/lilt/mod*_lilt* @zucchini-nlp
-/src/transformers/models/llava/mod*_llava* @zucchini-nlp @arthurzucker
-/src/transformers/models/llava_next/mod*_llava_next* @zucchini-nlp
-/src/transformers/models/llava_next_video/mod*_llava_next_video* @zucchini-nlp
-/src/transformers/models/llava_onevision/mod*_llava_onevision* @zucchini-nlp
-/src/transformers/models/lxmert/mod*_lxmert* @zucchini-nlp
-/src/transformers/models/matcha/mod*_matcha* @zucchini-nlp
-/src/transformers/models/mgp_str/mod*_mgp_str* @zucchini-nlp
-/src/transformers/models/mllama/mod*_mllama* @zucchini-nlp
-/src/transformers/models/nougat/mod*_nougat* @NielsRogge
-/src/transformers/models/omdet_turbo/mod*_omdet_turbo* @qubvel @yonigozlan
-/src/transformers/models/oneformer/mod*_oneformer* @zucchini-nlp
-/src/transformers/models/owlvit/mod*_owlvit* @qubvel
-/src/transformers/models/owlv2/mod*_owlv2* @qubvel
-/src/transformers/models/paligemma/mod*_paligemma* @zucchini-nlp @molbap
-/src/transformers/models/perceiver/mod*_perceiver* @zucchini-nlp
-/src/transformers/models/pix2struct/mod*_pix2struct* @zucchini-nlp
-/src/transformers/models/pixtral/mod*_pixtral* @zucchini-nlp @ArthurZucker
-/src/transformers/models/qwen2_audio/mod*_qwen2_audio* @zucchini-nlp @ArthurZucker
-/src/transformers/models/qwen2_vl/mod*_qwen2_vl* @zucchini-nlp @ArthurZucker
-/src/transformers/models/sam/mod*_sam* @zucchini-nlp @ArthurZucker
-/src/transformers/models/siglip/mod*_siglip* @zucchini-nlp
-/src/transformers/models/speech_encoder_decoder/mod*_speech_encoder_decoder* @zucchini-nlp
-/src/transformers/models/tapas/mod*_tapas* @NielsRogge
-/src/transformers/models/trocr/mod*_trocr* @zucchini-nlp
-/src/transformers/models/tvlt/mod*_tvlt* @zucchini-nlp
-/src/transformers/models/tvp/mod*_tvp* @zucchini-nlp
-/src/transformers/models/udop/mod*_udop* @zucchini-nlp
-/src/transformers/models/video_llava/mod*_video_llava* @zucchini-nlp
-/src/transformers/models/vilt/mod*_vilt* @zucchini-nlp
-/src/transformers/models/vipllava/mod*_vipllava* @zucchini-nlp
-/src/transformers/models/vision_encoder_decoder/mod*_vision_encoder_decoder* @Rocketknight1
-/src/transformers/models/vision_text_dual_encoder/mod*_vision_text_dual_encoder* @Rocketknight1
-/src/transformers/models/visual_bert/mod*_visual_bert* @zucchini-nlp
-/src/transformers/models/xclip/mod*_xclip* @zucchini-nlp
-
-# Reinforcement learning models
-/src/transformers/models/decision_transformer/mod*_decision_transformer* @Rocketknight1
-/src/transformers/models/trajectory_transformer/mod*_trajectory_transformer* @Rocketknight1
-
-# Time series models
-/src/transformers/models/autoformer/mod*_autoformer* @Rocketknight1
-/src/transformers/models/informer/mod*_informer* @Rocketknight1
-/src/transformers/models/patchtsmixer/mod*_patchtsmixer* @Rocketknight1
-/src/transformers/models/patchtst/mod*_patchtst* @Rocketknight1
-/src/transformers/models/time_series_transformer/mod*_time_series_transformer* @Rocketknight1
-
-# Graph models
-/src/transformers/models/graphormer/mod*_graphormer* @clefourrier
-
-# Finally, files with no owners that shouldn't generate pings, usually automatically generated and checked in the CI
-utils/dummy*
--- a/.github/workflows/add-model-like.yml
+++ b/.github/workflows/add-model-like.yml
@ -54,7 +54,7 @@ jobs:
      - name: Create model files
        run: |
          . ~/venv/bin/activate
-          transformers add-new-model-like --config_file tests/fixtures/add_distilbert_like_config.json --path_to_repo .
+          transformers-cli add-new-model-like --config_file tests/fixtures/add_distilbert_like_config.json --path_to_repo .
          make style
          make fix-copies

--- a/.github/workflows/assign-reviewers.yml
+++ b/.github/workflows/assign-reviewers.yml
@ -1,26 +0,0 @@
-name: Assign PR Reviewers
-on:
-  pull_request_target:
-    branches:
-      - main
-    types: [ready_for_review]
-
-jobs:
-  assign_reviewers:
-    permissions:
-       pull-requests: write
-    runs-on: ubuntu-22.04
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.13'
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install PyGithub
-      - name: Run assignment script
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: python .github/scripts/assign_reviewers.py
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@ -18,8 +18,7 @@ jobs:
    name: Benchmark
    strategy:
      matrix:
-        # group: [aws-g5-4xlarge-cache, aws-p4d-24xlarge-plus] (A100 runner is not enabled)
-        group: [aws-g5-4xlarge-cache]
+        group: [aws-g5-4xlarge-cache, aws-p4d-24xlarge-plus]
    runs-on:
      group: ${{ matrix.group }}
    if: |
@ -48,7 +47,7 @@ jobs:

      - name: Run database init script
        run: |
-          psql -f benchmark/utils/init_db.sql
+          psql -f benchmark/init_db.sql
        env:
          PGDATABASE: metrics
          PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
@ -64,7 +63,7 @@ jobs:
            commit_id=$GITHUB_SHA
          fi
          commit_msg=$(git show -s --format=%s | cut -c1-70)
-          python3 benchmark/benchmarks_entrypoint.py "huggingface/transformers" "$BRANCH_NAME" "$commit_id" "$commit_msg"
+          python3 benchmark/benchmarks_entrypoint.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
        env:
          HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
          # Enable this to see debug logs
@ -73,4 +72,3 @@ jobs:
          PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
          PGUSER: transformers_benchmarks
          PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}
-          BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
--- a/.github/workflows/build-ci-docker-images.yml
+++ b/.github/workflows/build-ci-docker-images.yml
@ -26,7 +26,7 @@ jobs:

    strategy:
      matrix:
-        file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "jax-light", "examples-torch",  "examples-tf"]
+        file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "torch-jax-light", "jax-light", "examples-torch",  "examples-tf"]
    continue-on-error: true

    steps:
@ -34,11 +34,11 @@ jobs:
        name: Set tag
        run: |
              if ${{contains(github.event.head_commit.message, '[build-ci-image]')}}; then
-                  echo "TAG=huggingface/transformers-${{ matrix.file }}:dev" >> "$GITHUB_ENV"
+                  echo "TAG=huggingface/transformers-${{ matrix.file }}:dev" >> "$GITHUB_ENV" 
                  echo "setting it to DEV!"
              else
                  echo "TAG=huggingface/transformers-${{ matrix.file }}" >> "$GITHUB_ENV"
-
+                  
              fi
      -
        name: Set up Docker Buildx
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@ -19,7 +19,7 @@ concurrency:

 jobs:
  latest-docker:
-    name: "Latest PyTorch [dev]"
+    name: "Latest PyTorch + TensorFlow [dev]"
    runs-on:
      group: aws-general-8-plus
    steps:
@ -63,14 +63,14 @@ jobs:
        uses: huggingface/hf-workflows/.github/actions/post-slack@main
        with:
          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build
+          title: 🤗 Results of the transformers-all-latest-gpu-push-ci docker build 
          status: ${{ job.status }}
          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

  latest-torch-deepspeed-docker:
    name: "Latest PyTorch + DeepSpeed"
    runs-on:
-      group: aws-g4dn-2xlarge-cache
+      group: aws-general-8-plus
    steps:
      -
        name: Set up Docker Buildx
@ -99,7 +99,7 @@ jobs:
        uses: huggingface/hf-workflows/.github/actions/post-slack@main
        with:
          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER}}
-          title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build
+          title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu docker build 
          status: ${{ job.status }}
          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

@ -140,7 +140,7 @@ jobs:
        uses: huggingface/hf-workflows/.github/actions/post-slack@main
        with:
          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build
+          title: 🤗 Results of the transformers-pytorch-deepspeed-latest-gpu-push-ci docker build 
          status: ${{ job.status }}
          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

@ -176,7 +176,7 @@ jobs:
        uses: huggingface/hf-workflows/.github/actions/post-slack@main
        with:
          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the huggingface/transformers-doc-builder docker build
+          title: 🤗 Results of the huggingface/transformers-doc-builder docker build 
          status: ${{ job.status }}
          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

@ -214,7 +214,7 @@ jobs:
        uses: huggingface/hf-workflows/.github/actions/post-slack@main
        with:
          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build
+          title: 🤗 Results of the huggingface/transformers-pytorch-gpudocker build 
          status: ${{ job.status }}
          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

@ -223,19 +223,19 @@ jobs:
    runs-on:
      group: aws-general-8-plus
    steps:
-      -
+      - 
        name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
-      -
+      - 
        name: Check out code
        uses: actions/checkout@v4
-      -
+      - 
        name: Login to DockerHub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_PASSWORD }}
-      -
+      - 
        name: Build and push
        uses: docker/build-push-action@v5
        with:
@ -263,12 +263,14 @@ jobs:
        uses: huggingface/hf-workflows/.github/actions/post-slack@main
        with:
          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build
+          title: 🤗 Results of the huggingface/transformers-pytorch-amd-gpu-push-ci build 
          status: ${{ job.status }}
          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

-  latest-pytorch-deepspeed-amd:
-    name: "PyTorch + DeepSpeed (AMD) [dev]"
+  latest-tensorflow:
+    name: "Latest TensorFlow [dev]"
+    # Push CI doesn't need this image
+    if: inputs.image_postfix != '-push-ci'
    runs-on:
      group: aws-general-8-plus
    steps:
@ -285,6 +287,42 @@ jobs:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_PASSWORD }}
      -
+        name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: ./docker/transformers-tensorflow-gpu
+          build-args: |
+            REF=main
+          push: true
+          tags: huggingface/transformers-tensorflow-gpu
+
+      - name: Post to Slack
+        if: always()
+        uses: huggingface/hf-workflows/.github/actions/post-slack@main
+        with:
+          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
+          title: 🤗 Results of the huggingface/transformers-tensorflow-gpu build 
+          status: ${{ job.status }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
+
+  latest-pytorch-deepspeed-amd:
+    name: "PyTorch + DeepSpeed (AMD) [dev]"
+    runs-on:
+      group: aws-general-8-plus
+    steps:
+      - 
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - 
+        name: Check out code
+        uses: actions/checkout@v4
+      - 
+        name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+      - 
        name: Build and push
        uses: docker/build-push-action@v5
        with:
@ -312,7 +350,7 @@ jobs:
        uses: huggingface/hf-workflows/.github/actions/post-slack@main
        with:
          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build
+          title: 🤗 Results of the transformers-pytorch-deepspeed-amd-gpu build 
          status: ${{ job.status }}
          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

@ -350,6 +388,6 @@ jobs:
        uses: huggingface/hf-workflows/.github/actions/post-slack@main
        with:
          slack_channel: ${{ secrets.CI_SLACK_CHANNEL_DOCKER }}
-          title: 🤗 Results of the transformers-quantization-latest-gpu build
+          title: 🤗 Results of the transformers-quantization-latest-gpu build 
          status: ${{ job.status }}
          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
--- a/.github/workflows/build-nightly-ci-docker-images.yml
+++ b/.github/workflows/build-nightly-ci-docker-images.yml
@ -42,7 +42,7 @@ jobs:
  nightly-torch-deepspeed-docker:
    name: "Nightly PyTorch + DeepSpeed"
    runs-on:
-      group: aws-g4dn-2xlarge-cache
+      group: aws-general-8-plus
    steps:
      -
        name: Set up Docker Buildx
--- a/.github/workflows/build_pr_documentation.yml
+++ b/.github/workflows/build_pr_documentation.yml
@ -14,4 +14,5 @@ jobs:
      commit_sha: ${{ github.event.pull_request.head.sha }}
      pr_number: ${{ github.event.number }}
      package: transformers
-      languages: en
+      languages: ar de en es fr hi it ko pt tr zh ja te
+      custom_container: huggingface/transformers-doc-builder
--- a/.github/workflows/check_failed_model_tests.yml
+++ b/.github/workflows/check_failed_model_tests.yml
@ -9,18 +9,6 @@ on:
      start_sha:
        required: true
        type: string
-      job:
-        required: true
-        type: string
-      slack_report_channel:
-        required: true
-        type: string
-      ci_event:
-        required: true
-        type: string
-      report_repo_id:
-        required: true
-        type: string


 env:
@ -34,132 +22,82 @@ env:
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
+  RUN_PT_TF_CROSS_TESTS: 1
  CUDA_VISIBLE_DEVICES: 0,1


 jobs:
-  check_new_failures:
+  run_models_gpu:
    name: " "
    runs-on:
-      group: aws-g5-4xlarge-cache
+      group: aws-g4dn-2xlarge-cache
    container:
      image: ${{ inputs.docker }}
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - uses: actions/download-artifact@v4
        with:
-          name: ci_results_${{ inputs.job }}
-          path: /transformers/ci_results_${{ inputs.job }}
-
-      - name: Check file
-        working-directory: /transformers
-        run: |
-          if [ -f ci_results_${{ inputs.job }}/new_failures.json ]; then
-            echo "`ci_results_${{ inputs.job }}/new_failures.json` exists, continue ..."
-            echo "process=true" >> $GITHUB_ENV
-          else
-            echo "`ci_results_${{ inputs.job }}/new_failures.json` doesn't exist, abort."
-            echo "process=false" >> $GITHUB_ENV
-          fi
-
-      - uses: actions/download-artifact@v4
-        if: ${{ env.process == 'true' }}
-        with:
-          pattern: setup_values*
-          path: setup_values
-          merge-multiple: true
-
-      - name: Prepare some setup values
-        if: ${{ env.process == 'true' }}
-        run: |
-          if [ -f setup_values/prev_workflow_run_id.txt ]; then
-            echo "PREV_WORKFLOW_RUN_ID=$(cat setup_values/prev_workflow_run_id.txt)" >> $GITHUB_ENV
-          else
-            echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
-          fi
-
-          if [ -f setup_values/other_workflow_run_id.txt ]; then
-            echo "OTHER_WORKFLOW_RUN_ID=$(cat setup_values/other_workflow_run_id.txt)" >> $GITHUB_ENV
-          else
-            echo "OTHER_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
-          fi
+          name: ci_results_run_models_gpu
+          path: /transformers/ci_results_run_models_gpu

      - name: Update clone
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        run: git fetch && git checkout ${{ github.sha }}

      - name: Get target commit
        working-directory: /transformers/utils
-        if: ${{ env.process == 'true' }}
        run: |
-          echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"], workflow_run_id=os.environ["PREV_WORKFLOW_RUN_ID"]); print(commit)')" >> $GITHUB_ENV
+          echo "END_SHA=$(TOKEN=${{ secrets.ACCESS_REPO_INFO_TOKEN }} python3 -c 'import os; from get_previous_daily_ci import get_last_daily_ci_run_commit; commit=get_last_daily_ci_run_commit(token=os.environ["TOKEN"]); print(commit)')" >> $GITHUB_ENV

      - name: Checkout to `start_sha`
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        run: git fetch && git checkout ${{ inputs.start_sha }}

      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .

      - name: NVIDIA-SMI
-        if: ${{ env.process == 'true' }}
        run: |
          nvidia-smi

      - name: Environment
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        run: |
          python3 utils/print_env.py

      - name: Show installed libraries and their versions
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        run: pip freeze

      - name: Check failed tests
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
-        run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_${{ inputs.job }}/new_failures.json --output_file new_failures_with_bad_commit.json
+        run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures.json --output_file new_model_failures_with_bad_commit.json

      - name: Show results
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        run: |
-          ls -l new_failures_with_bad_commit.json
-          cat new_failures_with_bad_commit.json
+          ls -l new_model_failures_with_bad_commit.json
+          cat new_model_failures_with_bad_commit.json

      - name: Checkout back
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        run: |
          git checkout ${{ inputs.start_sha }}

      - name: Process report
        shell: bash
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        env:
-          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
          TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
-          JOB_NAME: ${{ inputs.job }}
-          REPORT_REPO_ID: ${{ inputs.report_repo_id }}
        run: |
          python3 utils/process_bad_commit_report.py

      - name: Process report
        shell: bash
        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
        env:
-          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
          TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
-          JOB_NAME: ${{ inputs.job }}
-          REPORT_REPO_ID: ${{ inputs.report_repo_id }}
        run: |
          {
            echo 'REPORT_TEXT<<EOF'
@ -167,31 +105,17 @@ jobs:
            echo EOF
          } >> "$GITHUB_ENV"

-      - name: Prepare Slack report title
-        working-directory: /transformers
-        if: ${{ env.process == 'true' }}
-        run: |
-          pip install slack_sdk
-          echo "title=$(python3 -c 'import sys; sys.path.append("utils"); from utils.notification_service import job_to_test_map; ci_event = "${{ inputs.ci_event }}"; job = "${{ inputs.job }}"; test_name = job_to_test_map[job]; title = f"New failed tests of {ci_event}" + ":" + f" {test_name}"; print(title)')" >> $GITHUB_ENV
-
      - name: Send processed report
-        if: ${{ env.process == 'true' && !endsWith(env.REPORT_TEXT, '{}') }}
+        if: ${{ !endsWith(env.REPORT_TEXT, '{}') }}
        uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
        with:
          # Slack channel id, channel name, or user id to post message.
          # See also: https://api.slack.com/methods/chat.postMessage#channels
-          channel-id: '#${{ inputs.slack_report_channel }}'
+          channel-id: '#transformers-ci-feedback-tests'
          # For posting a rich message using Block Kit
          payload: |
            {
              "blocks": [
-                {
-                  "type": "header",
-                  "text": {
-                    "type": "plain_text",
-                    "text": "${{ env.title }}"
-                  }
-                },
                {
                  "type": "section",
                  "text": {
--- a/.github/workflows/collated-reports.yml
+++ b/.github/workflows/collated-reports.yml
@ -1,49 +0,0 @@
-name: CI collated reports
-
-on:
-  workflow_call:
-    inputs:
-      job:
-        required: true
-        type: string
-      report_repo_id:
-        required: true
-        type: string
-      machine_type:
-        required: true
-        type: string
-      gpu_name:
-        description: Name of the GPU used for the job. Its enough that the value contains the name of the GPU, e.g. "noise-h100-more-noise". Case insensitive.
-        required: true
-        type: string
-
-jobs:
-  collated_reports:
-    name: Collated reports
-    runs-on: ubuntu-22.04
-    if: always()
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/download-artifact@v4
-
-      - name: Collated reports
-        shell: bash
-        env:
-          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-          CI_SHA: ${{ github.sha }}
-          TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
-        run: |
-          pip install huggingface_hub
-          python3 utils/collated_reports.py                  \
-            --path /transformers/reports/                    \
-            --machine-type ${{ inputs.machine_type }}        \
-            --commit-hash ${{ env.CI_SHA }}                  \
-            --job ${{ inputs.job }}                          \
-            --report-repo-id ${{ inputs.report_repo_id }}    \
-            --gpu-name ${{ inputs.gpu_name }}
-
-      - name: Upload collated reports
-        uses: actions/upload-artifact@v4
-        with:
-          name: collated_reports_${{ env.CI_SHA }}.json
-          path: collated_reports_${{ env.CI_SHA }}.json
--- a/.github/workflows/doctest_job.yml
+++ b/.github/workflows/doctest_job.yml
@ -28,10 +28,10 @@ jobs:
      matrix:
        split_keys: ${{ fromJson(inputs.split_keys) }}
    runs-on: 
-      group: aws-g5-4xlarge-cache
+      group: aws-g4dn-2xlarge-cache
    container:
      image: huggingface/transformers-all-latest-gpu
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Update clone
        working-directory: /transformers
--- a/.github/workflows/doctests.yml
+++ b/.github/workflows/doctests.yml
@ -15,10 +15,10 @@ jobs:
  setup:
    name: Setup
    runs-on: 
-      group: aws-g5-4xlarge-cache
+      group: aws-g4dn-2xlarge-cache
    container:
      image: huggingface/transformers-all-latest-gpu
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    outputs:
      job_splits: ${{ steps.set-matrix.outputs.job_splits }}
      split_keys: ${{ steps.set-matrix.outputs.split_keys }}
--- a/.github/workflows/get-pr-info.yml
+++ b/.github/workflows/get-pr-info.yml
@ -1,157 +0,0 @@
-name: Get PR commit SHA
-on:
-  workflow_call:
-    inputs:
-      pr_number:
-        required: true
-        type: string
-    outputs:
-      PR_HEAD_REPO_FULL_NAME:
-        description: "The full name of the repository from which the pull request is created"
-        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_FULL_NAME }}
-      PR_BASE_REPO_FULL_NAME:
-        description: "The full name of the repository to which the pull request is created"
-        value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_FULL_NAME }}
-      PR_HEAD_REPO_OWNER:
-        description: "The owner of the repository from which the pull request is created"
-        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}
-      PR_BASE_REPO_OWNER:
-        description: "The owner of the repository to which the pull request is created"
-        value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_OWNER }}
-      PR_HEAD_REPO_NAME:
-        description: "The name of the repository from which the pull request is created"
-        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}
-      PR_BASE_REPO_NAME:
-        description: "The name of the repository to which the pull request is created"
-        value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_NAME }}
-      PR_HEAD_REF:
-        description: "The branch name of the pull request in the head repository"
-        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REF }}
-      PR_BASE_REF:
-        description: "The branch name in the base repository (to merge into)"
-        value: ${{ jobs.get-pr-info.outputs.PR_BASE_REF }}
-      PR_HEAD_SHA:
-        description: "The head sha of the pull request branch in the head repository"
-        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_SHA }}
-      PR_BASE_SHA:
-        description: "The head sha of the target branch in the base repository"
-        value: ${{ jobs.get-pr-info.outputs.PR_BASE_SHA }}
-      PR_MERGE_COMMIT_SHA:
-        description: "The sha of the merge commit for the pull request (created by GitHub) in the base repository"
-        value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_SHA }}
-      PR_HEAD_COMMIT_DATE:
-        description: "The date of the head sha of the pull request branch in the head repository"
-        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_DATE }}
-      PR_MERGE_COMMIT_DATE:
-        description: "The date of the merge commit for the pull request (created by GitHub) in the base repository"
-        value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_DATE }}
-      PR_HEAD_COMMIT_TIMESTAMP:
-        description: "The timestamp of the head sha of the pull request branch in the head repository"
-        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_TIMESTAMP }}
-      PR_MERGE_COMMIT_TIMESTAMP:
-        description: "The timestamp of the merge commit for the pull request (created by GitHub) in the base repository"
-        value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }}
-      PR:
-        description: "The PR"
-        value: ${{ jobs.get-pr-info.outputs.PR }}
-      PR_FILES:
-        description: "The files touched in the PR"
-        value: ${{ jobs.get-pr-info.outputs.PR_FILES }}
-
-
-jobs:
-  get-pr-info:
-    runs-on: ubuntu-22.04
-    name: Get PR commit SHA better
-    outputs:
-      PR_HEAD_REPO_FULL_NAME: ${{ steps.pr_info.outputs.head_repo_full_name }}
-      PR_BASE_REPO_FULL_NAME: ${{ steps.pr_info.outputs.base_repo_full_name }}
-      PR_HEAD_REPO_OWNER: ${{ steps.pr_info.outputs.head_repo_owner }}
-      PR_BASE_REPO_OWNER: ${{ steps.pr_info.outputs.base_repo_owner }}
-      PR_HEAD_REPO_NAME: ${{ steps.pr_info.outputs.head_repo_name }}
-      PR_BASE_REPO_NAME: ${{ steps.pr_info.outputs.base_repo_name }}
-      PR_HEAD_REF: ${{ steps.pr_info.outputs.head_ref }}
-      PR_BASE_REF: ${{ steps.pr_info.outputs.base_ref }}
-      PR_HEAD_SHA: ${{ steps.pr_info.outputs.head_sha }}
-      PR_BASE_SHA: ${{ steps.pr_info.outputs.base_sha }}
-      PR_MERGE_COMMIT_SHA: ${{ steps.pr_info.outputs.merge_commit_sha }}
-      PR_HEAD_COMMIT_DATE: ${{ steps.pr_info.outputs.head_commit_date }}
-      PR_MERGE_COMMIT_DATE: ${{ steps.pr_info.outputs.merge_commit_date }}
-      PR_HEAD_COMMIT_TIMESTAMP: ${{ steps.get_timestamps.outputs.head_commit_timestamp }}
-      PR_MERGE_COMMIT_TIMESTAMP: ${{ steps.get_timestamps.outputs.merge_commit_timestamp }}
-      PR: ${{ steps.pr_info.outputs.pr }}
-      PR_FILES: ${{ steps.pr_info.outputs.files }}
-    if: ${{ inputs.pr_number != '' }}
-    steps:
-      - name: Extract PR details
-        id: pr_info
-        uses: actions/github-script@v6
-        with:
-          script: |            
-            const { data: pr } = await github.rest.pulls.get({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              pull_number: ${{ inputs.pr_number }}
-            });
-
-            const { data: head_commit }  = await github.rest.repos.getCommit({
-              owner: pr.head.repo.owner.login,
-              repo: pr.head.repo.name,
-              ref: pr.head.ref
-            });
-
-            const { data: merge_commit }  = await github.rest.repos.getCommit({
-              owner: pr.base.repo.owner.login,
-              repo: pr.base.repo.name,
-              ref: pr.merge_commit_sha,
-            });
-
-            const { data: files } = await github.rest.pulls.listFiles({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              pull_number: ${{ inputs.pr_number }}
-            });
-
-            core.setOutput('head_repo_full_name', pr.head.repo.full_name);
-            core.setOutput('base_repo_full_name', pr.base.repo.full_name);
-            core.setOutput('head_repo_owner', pr.head.repo.owner.login);
-            core.setOutput('base_repo_owner', pr.base.repo.owner.login);
-            core.setOutput('head_repo_name', pr.head.repo.name);
-            core.setOutput('base_repo_name', pr.base.repo.name);
-            core.setOutput('head_ref', pr.head.ref);
-            core.setOutput('base_ref', pr.base.ref);
-            core.setOutput('head_sha', pr.head.sha);
-            core.setOutput('base_sha', pr.base.sha);
-            core.setOutput('merge_commit_sha', pr.merge_commit_sha);
-            core.setOutput('pr', pr);
-
-            core.setOutput('head_commit_date', head_commit.commit.committer.date);
-            core.setOutput('merge_commit_date', merge_commit.commit.committer.date);
-            
-            core.setOutput('files', files);            
-            
-            console.log('PR head commit:', {
-              head_commit: head_commit,
-              commit: head_commit.commit,
-              date: head_commit.commit.committer.date
-            });
-
-            console.log('PR merge commit:', {
-              merge_commit: merge_commit,
-              commit: merge_commit.commit,
-              date: merge_commit.commit.committer.date
-            });
-
-      - name: Convert dates to timestamps
-        id: get_timestamps
-        run: |
-          head_commit_date=${{ steps.pr_info.outputs.head_commit_date }}
-          merge_commit_date=${{ steps.pr_info.outputs.merge_commit_date }}
-          echo $head_commit_date
-          echo $merge_commit_date
-          head_commit_timestamp=$(date -d "$head_commit_date" +%s)
-          merge_commit_timestamp=$(date -d "$merge_commit_date" +%s)
-          echo $head_commit_timestamp
-          echo $merge_commit_timestamp
-          echo "head_commit_timestamp=$head_commit_timestamp" >> $GITHUB_OUTPUT
-          echo "merge_commit_timestamp=$merge_commit_timestamp" >> $GITHUB_OUTPUT
--- a/.github/workflows/get-pr-number.yml
+++ b/.github/workflows/get-pr-number.yml
@ -1,36 +0,0 @@
-name: Get PR number
-on:
-  workflow_call:
-    outputs:
-      PR_NUMBER:
-        description: "The extracted PR number"
-        value: ${{ jobs.get-pr-number.outputs.PR_NUMBER }}
-
-jobs:
-  get-pr-number:
-    runs-on: ubuntu-22.04
-    name: Get PR number
-    outputs:
-      PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
-    steps:
-      - name: Get PR number
-        shell: bash
-        run: |
-          if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then
-            echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
-          elif [[ "${{ github.event.pull_request.number }}" != "" ]]; then
-            echo "PR_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
-          elif [[ "${{ github.event.pull_request }}" != "" ]]; then
-            echo "PR_NUMBER=${{ github.event.number }}" >> $GITHUB_ENV
-          else
-            echo "PR_NUMBER=" >> $GITHUB_ENV
-          fi
-
-      - name: Check PR number
-        shell: bash
-        run: |
-          echo "${{ env.PR_NUMBER }}"
-
-      - name: Set PR number
-        id: set_pr_number
-        run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT"
--- a/.github/workflows/model_jobs.yml
+++ b/.github/workflows/model_jobs.yml
@ -12,16 +12,12 @@ on:
      slice_id:
        required: true
        type: number
-      runner_map:
-        required: false
+      runner:
+        required: true
        type: string
      docker:
        required: true
        type: string
-      report_name_prefix:
-        required: false
-        default: run_models_gpu
-        type: string

 env:
  HF_HOME: /mnt/cache
@ -34,6 +30,7 @@ env:
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
+  RUN_PT_TF_CROSS_TESTS: 1
  CUDA_VISIBLE_DEVICES: 0,1

 jobs:
@ -45,7 +42,7 @@ jobs:
      matrix:
        folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
    runs-on:
-      group: ${{ fromJson(inputs.runner_map)[matrix.folders][inputs.machine_type] }}
+      group: '${{ inputs.machine_type }}'
    container:
      image: ${{ inputs.docker }}
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -107,9 +104,9 @@ jobs:
        run: |
          echo "${{ inputs.machine_type }}"

-          if [ "${{ inputs.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
+          if [ "${{ inputs.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
-          elif [ "${{ inputs.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
+          elif [ "${{ inputs.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ inputs.machine_type }}
@ -120,106 +117,23 @@ jobs:

      - name: Run all tests on GPU
        working-directory: /transformers
-        if: ${{ always() }}
-        run: |
-          python3 -m pip uninstall -y natten
-          python3 -m pip uninstall -y ninja && python3 -m pip install ninja && python3 -m pip install flash-attn --no-build-isolation
-#          START=0 END=400 python3 run.py
-#
-#      - name: "Upload"
-#        if: ${{ always() }}
-#        uses: actions/upload-artifact@v4
-#        with:
-#          name: summary_short_1
-#          path: /transformers/summary_short.txt
-#
-#      - name: Run all tests on GPU
-#        working-directory: /transformers
-#        if: ${{ always() }}
-#        run: |
-#          START=400 END=800 python3 run.py
-#
-#      - name: "Upload"
-#        if: ${{ always() }}
-#        uses: actions/upload-artifact@v4
-#        with:
-#          name: summary_short_2
-#          path: /transformers/summary_short.txt
-#
-#      - name: Run all tests on GPU
-#        working-directory: /transformers
-#        if: ${{ always() }}
-#        run: |
-#          START=800 END=1200 python3 run.py
-#
-#      - name: "Upload"
-#        if: ${{ always() }}
-#        uses: actions/upload-artifact@v4
-#        with:
-#          name: summary_short_3
-#          path: /transformers/summary_short.txt
-#
-#      - name: Run all tests on GPU
-#        working-directory: /transformers
-#        if: ${{ always() }}
-#        run: |
-#          START=1200 END=1600 python3 run.py
-#
-#      - name: "Upload"
-#        if: ${{ always() }}
-#        uses: actions/upload-artifact@v4
-#        with:
-#          name: summary_short_4
-#          path: /transformers/summary_short.txt
+        run: python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}

-      - name: Run all tests on GPU
-        working-directory: /transformers
-        if: ${{ always() }}
-        run: |
-          START=1600 END=2000 python3 run.py
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt

-      - name: "Upload"
+      - name: Run test
+        shell: bash
+        run: |
+          mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+          echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
+          echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
+
+      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
-          name: summary_short_5
-          path: /transformers/summary_short.txt
-
-      - name: Run all tests on GPU
-        working-directory: /transformers
-        if: ${{ always() }}
-        run: |
-          START=2000 END=2400 python3 run.py
-
-      - name: "Upload"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: summary_short_6
-          path: /transformers/summary_short.txt
-
-      - name: Run all tests on GPU
-        working-directory: /transformers
-        if: ${{ always() }}
-        run: |
-          START=2400 END=2800 python3 run.py
-
-      - name: "Upload"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: summary_short_7
-          path: /transformers/summary_short.txt
-
-      - name: Run all tests on GPU
-        working-directory: /transformers
-        if: ${{ always() }}
-        run: |
-          START=2800 END=3000 python3 run.py
-
-      - name: "Upload"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: summary_short_8
-          path: /transformers/summary_short.txt
+          name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
+          path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
--- a/.github/workflows/model_jobs_amd.yml
+++ b/.github/workflows/model_jobs_amd.yml
@ -0,0 +1,129 @@
+name: model jobs
+
+on:
+  workflow_call:
+    inputs:
+      folder_slices:
+        required: true
+        type: string
+      machine_type:
+        required: true
+        type: string
+      slice_id:
+        required: true
+        type: number
+      runner:
+        required: true
+        type: string
+      docker:
+        required: true
+        type: string
+
+env:
+  HF_HOME: /mnt/cache
+  TRANSFORMERS_IS_CI: yes
+  OMP_NUM_THREADS: 8
+  MKL_NUM_THREADS: 8
+  RUN_SLOW: yes
+  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
+  # This token is created under the bot `hf-transformers-bot`.
+  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
+  TF_FORCE_GPU_ALLOW_GROWTH: true
+  RUN_PT_TF_CROSS_TESTS: 1
+  CUDA_VISIBLE_DEVICES: 0,1
+
+jobs:
+  run_models_gpu:
+    name: " "
+    strategy:
+      max-parallel: 1  # For now, not to parallelize. Can change later if it works well.
+      fail-fast: false
+      matrix:
+        folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
+    runs-on: ['${{ inputs.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
+    container:
+      image: ${{ inputs.docker }}
+      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - name: Echo input and matrix info
+        shell: bash
+        run: |
+          echo "${{ inputs.folder_slices }}"
+          echo "${{ matrix.folders }}"
+          echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}"
+
+      - name: Echo folder ${{ matrix.folders }}
+        shell: bash
+        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
+        # set the artifact folder names (because the character `/` is not allowed).
+        run: |
+          echo "${{ matrix.folders }}"
+          matrix_folders=${{ matrix.folders }}
+          matrix_folders=${matrix_folders/'models/'/'models_'}
+          echo "$matrix_folders"
+          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
+
+      - name: Update clone
+        working-directory: /transformers
+        run: git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: Update / Install some packages (for Past CI)
+        if: ${{ contains(inputs.docker, '-past-') }}
+        working-directory: /transformers
+        run: |
+          python3 -m pip install -U datasets
+
+      - name: Update / Install some packages (for Past CI)
+        if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }}
+        working-directory: /transformers
+        run: |
+          python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate
+
+      - name: ROCM-SMI
+        run: |
+          rocm-smi
+
+      - name: ROCM-INFO
+        run: |
+          rocminfo  | grep "Agent" -A 14
+
+      - name: Show ROCR environment
+        run: |
+          echo "ROCR: $ROCR_VISIBLE_DEVICES"
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all tests on GPU
+        working-directory: /transformers
+        run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}  -m "not not_device_test"
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
+
+      - name: Run test
+        shell: bash
+        run: |
+          mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+          echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt
+          echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports"
+
+      - name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
+          path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
--- a/.github/workflows/model_jobs_intel_gaudi.yml
+++ b/.github/workflows/model_jobs_intel_gaudi.yml
@ -1,121 +0,0 @@
-name: model jobs
-
-on:
-  workflow_call:
-    inputs:
-      folder_slices:
-        required: true
-        type: string
-      slice_id:
-        required: true
-        type: number
-      runner:
-        required: true
-        type: string
-      machine_type:
-        required: true
-        type: string
-      report_name_prefix:
-        required: false
-        default: run_models_gpu
-        type: string
-
-env:
-  RUN_SLOW: yes
-  PT_HPU_LAZY_MODE: 0
-  TRANSFORMERS_IS_CI: yes
-  PT_ENABLE_INT64_SUPPORT: 1
-  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  HF_HOME: /mnt/cache/.cache/huggingface
-
-jobs:
-  run_models_gpu:
-    name: " "
-    strategy:
-      max-parallel: 8
-      fail-fast: false
-      matrix:
-        folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }}
-    runs-on:
-      group: ${{ inputs.runner }}
-    container:
-      image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
-      options: --runtime=habana
-        -v /mnt/cache/.cache/huggingface:/mnt/cache/.cache/huggingface
-        --env OMPI_MCA_btl_vader_single_copy_mechanism=none
-        --env HABANA_VISIBLE_DEVICES
-        --env HABANA_VISIBLE_MODULES
-        --cap-add=sys_nice
-        --shm-size=64G
-    steps:
-      - name: Echo input and matrix info
-        shell: bash
-        run: |
-          echo "${{ inputs.folder_slices }}"
-          echo "${{ matrix.folders }}"
-          echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}"
-
-      - name: Echo folder ${{ matrix.folders }}
-        shell: bash
-        run: |
-          echo "${{ matrix.folders }}"
-          matrix_folders=${{ matrix.folders }}
-          matrix_folders=${matrix_folders/'models/'/'models_'}
-          echo "$matrix_folders"
-          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
-
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Install dependencies
-        run: |
-          pip install -e .[testing,torch] "numpy<2.0.0" scipy scikit-learn
-
-      - name: HL-SMI
-        run: |
-          hl-smi
-          echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
-          echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
-
-      - name: Environment
-        run: python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        run: pip freeze
-
-      - name: Set `machine_type` for report and artifact names
-        shell: bash
-        run: |
-          if [ "${{ inputs.machine_type }}" = "1gaudi" ]; then
-            machine_type=single-gpu
-          elif [ "${{ inputs.machine_type }}" = "2gaudi" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ inputs.machine_type }}
-          fi
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Run all tests on Gaudi
-        run: python3 -m pytest -v --make-reports=${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/failures_short.txt
-
-      - name: Run test
-        shell: bash
-        run: |
-          mkdir -p reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports
-          echo "hello" > reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports/hello.txt
-          echo "${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports"
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ env.matrix_folders }}_test_reports
-          path: reports/${{ env.machine_type }}_${{ inputs.report_name_prefix }}_${{ matrix.folders }}_test_reports
--- a/.github/workflows/new_model_pr_merged_notification.yml
+++ b/.github/workflows/new_model_pr_merged_notification.yml
@ -1,68 +0,0 @@
-# Used to notify core maintainers about new model PR being merged
-name: New model PR merged notification
-
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - 'src/transformers/models/*/modeling_*'
-
-jobs:
-  notify_new_model:
-    name: Notify new model
-    runs-on: ubuntu-22.04
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-      - name: Check new model
-        shell: bash
-        run: |
-          python -m pip install gitpython
-          python -c 'from utils.pr_slow_ci_models import get_new_model; new_model = get_new_model(diff_with_last_commit=True); print(new_model)' | tee output.txt
-          echo "NEW_MODEL=$(tail -n 1 output.txt)" >> $GITHUB_ENV
-          echo "COMMIT_SHA=$(git log -1 --format=%H)" >> $GITHUB_ENV
-
-      - name: print commit sha
-        if: ${{ env.NEW_MODEL != ''}}
-        shell: bash
-        run: |
-          echo "$COMMIT_SHA"
-
-      - name: print new model
-        if: ${{ env.NEW_MODEL != ''}}
-        shell: bash
-        run: |
-          echo "$NEW_MODEL"
-
-      - name: Notify
-        if: ${{ env.NEW_MODEL != ''}}
-        uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
-        with:
-          # Slack channel id, channel name, or user id to post message.
-          # See also: https://api.slack.com/methods/chat.postMessage#channels
-          channel-id: transformers-new-model-notification
-          # For posting a rich message using Block Kit
-          payload: |
-            {
-              "blocks": [
-                {
-                  "type": "header",
-                  "text": {
-                    "type": "plain_text",
-                    "text": "New model!",
-                    "emoji": true
-                  }
-                },
-                {
-                  "type": "section",
-                  "text": {
-                    "type": "mrkdwn",
-                    "text": "<https://github.com/huggingface/transformers/commit/${{ env.COMMIT_SHA }}|New model: ${{ env.NEW_MODEL }}> GH_ArthurZucker, GH_lysandrejik, GH_ydshieh\ncommit SHA: ${{ env.COMMIT_SHA }}"
-                  }
-                }
-              ]
-            }
-        env:
-          SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
--- a/.github/workflows/pr-style-bot.yml
+++ b/.github/workflows/pr-style-bot.yml
@ -1,18 +0,0 @@
-# To run this bot, comment "@bot /style" on a PR
-name: Style Bot
-
-on:
-  issue_comment:
-    types: [created]
-
-permissions:
-  pull-requests: write
-
-jobs:
-  style:
-    uses: huggingface/huggingface_hub/.github/workflows/style-bot-action.yml@main
-    with:
-      python_quality_dependencies: "[quality]"
-      style_command_type: "default"
-    secrets:
-      bot_token: ${{ secrets.HF_STYLE_BOT_ACTION }}
--- a/.github/workflows/pr_build_doc_with_comment.yml
+++ b/.github/workflows/pr_build_doc_with_comment.yml
@ -1,134 +0,0 @@
-name: PR - build doc via comment
-on:
-  issue_comment:
-    types:
-      - created
-    branches-ignore:
-      - main
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.issue.number }}-${{ startsWith(github.event.comment.body, 'build-doc') }}
-  cancel-in-progress: true
-permissions: {}
-
-
-jobs:
-  get-pr-number:
-    name: Get PR number
-    if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr", "eustlb", "MekkCyber", "manueldeprada", "vasqu", "ivarflakstad", "stevhliu", "ebezzam"]'), github.actor) && (startsWith(github.event.comment.body, 'build-doc')) }}
-    uses: ./.github/workflows/get-pr-number.yml
-
-  get-pr-info:
-    name: Get PR commit SHA
-    needs: get-pr-number
-    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
-    uses: ./.github/workflows/get-pr-info.yml
-    with:
-      pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
-
-  verity_pr_commit:
-    name: Verity PR commit corresponds to a specific event by comparing timestamps
-    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
-    runs-on: ubuntu-22.04
-    needs: get-pr-info
-    env:
-      COMMENT_DATE: ${{ github.event.comment.created_at }}
-      PR_MERGE_COMMIT_DATE: ${{ needs.get-pr-info.outputs.PR_MERGE_COMMIT_DATE }}
-      PR_MERGE_COMMIT_TIMESTAMP: ${{ needs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }}
-    steps:
-      - run: |
-          COMMENT_TIMESTAMP=$(date -d "${COMMENT_DATE}" +"%s")
-          echo "COMMENT_DATE: $COMMENT_DATE"
-          echo "PR_MERGE_COMMIT_DATE: $PR_MERGE_COMMIT_DATE"
-          echo "COMMENT_TIMESTAMP: $COMMENT_TIMESTAMP"
-          echo "PR_MERGE_COMMIT_TIMESTAMP: $PR_MERGE_COMMIT_TIMESTAMP"
-          if [ $COMMENT_TIMESTAMP -le $PR_MERGE_COMMIT_TIMESTAMP ]; then
-            echo "Last commit on the pull request is newer than the issue comment triggering this run! Abort!";
-            exit -1;
-          fi
-
-  create_run:
-    name: Create run
-    needs: [get-pr-number, get-pr-info]
-    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != '' }}
-    permissions:
-      statuses: write
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Create Run
-        id: create_run
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          # Create a commit status (pending) for a run of this workflow. The status has to be updated later in `update_run_status`.
-          # See https://docs.github.com/en/rest/commits/statuses?apiVersion=2022-11-28#create-a-commit-status
-          GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
-        run: |
-          gh api \
-            --method POST \
-            -H "Accept: application/vnd.github+json" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            repos/${{ github.repository }}/statuses/${{ needs.get-pr-info.outputs.PR_HEAD_SHA }} \
-            -f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Custom doc building job" -f "context=custom-doc-build"
-
-  reply_to_comment:
-    name: Reply to the comment
-    if: ${{ needs.create_run.result == 'success' }}
-    needs: [get-pr-number, create_run]
-    permissions:
-      pull-requests: write
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Reply to the comment
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
-        run: |
-          gh api \
-            --method POST \
-            -H "Accept: application/vnd.github+json" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \
-            -f "body=[Building docs for all languages...](${{ env.GITHUB_RUN_URL }})"
-
-  build-doc:
-    name: Build doc
-    needs: [get-pr-number, get-pr-info]
-    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != '' }}
-    uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
-    with:
-      commit_sha: ${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}
-      pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
-      package: transformers
-      languages: ar de en es fr hi it ko pt tr zh ja te
-
-  update_run_status:
-    name: Update Check Run Status
-    needs: [ get-pr-info, create_run, build-doc ]
-    permissions:
-      statuses: write
-    if: ${{ always() && needs.create_run.result == 'success' }}
-    runs-on: ubuntu-22.04
-    env:
-      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
-      STATUS_OK: ${{ contains(fromJSON('["skipped", "success"]'), needs.create_run.result) }}
-    steps:
-      - name: Get `build-doc` job status
-        run: |
-          echo "${{ needs.build-doc.result }}"
-          echo $STATUS_OK
-          if [ "$STATUS_OK" = "true" ]; then
-            echo "STATUS=success" >> $GITHUB_ENV
-          else
-            echo "STATUS=failure" >> $GITHUB_ENV
-          fi
-
-      - name: Update PR commit statuses
-        run: |
-          echo "${{ needs.build-doc.result }}"
-          echo "${{ env.STATUS }}"
-          gh api \
-            --method POST \
-            -H "Accept: application/vnd.github+json" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            repos/${{ github.repository }}/statuses/${{ needs.get-pr-info.outputs.PR_HEAD_SHA }} \
-            -f "target_url=$GITHUB_RUN_URL" -f "state=${{ env.STATUS }}" -f "description=Custom doc building job" -f "context=custom-doc-build"
--- a/.github/workflows/pr_run_slow_ci.yml
+++ b/.github/workflows/pr_run_slow_ci.yml
@ -1,177 +0,0 @@
-name: PR slow CI
-on:
-  pull_request_target:
-    types: [opened, synchronize, reopened]
-
-jobs:
-  get-pr-number:
-    name: Get PR number
-    uses: ./.github/workflows/get-pr-number.yml
-
-  get-pr-info:
-    name: Get PR commit SHA
-    needs: get-pr-number
-    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
-    uses: ./.github/workflows/get-pr-info.yml
-    with:
-      pr_number: ${{ needs.get-pr-number.outputs.PR_NUMBER }}
-
-  get-jobs:
-    name: Get test files to run
-    runs-on: ubuntu-22.04
-    needs: [get-pr-number, get-pr-info]
-    outputs:
-      jobs: ${{ steps.get_jobs.outputs.jobs_to_run }}
-    steps:
-      - name: Get repository content
-        id: repo_content
-        uses: actions/github-script@v6
-        with:
-          script: |
-            const { data: tests_dir } = await github.rest.repos.getContent({
-              owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}',
-              repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}',
-              path: 'tests',
-              ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}',
-            });
-
-            const { data: tests_models_dir } = await github.rest.repos.getContent({
-              owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}',
-              repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}',
-              path: 'tests/models',
-              ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}',
-            });
-
-            const { data: tests_quantization_dir } = await github.rest.repos.getContent({
-              owner: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}',
-              repo: '${{ needs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}',
-              path: 'tests/quantization',
-              ref: '${{ needs.get-pr-info.outputs.PR_HEAD_SHA }}',
-            });
-
-            core.setOutput('tests_dir', tests_dir);
-            core.setOutput('tests_models_dir', tests_models_dir);
-            core.setOutput('tests_quantization_dir', tests_quantization_dir);
-
-      # This checkout to the main branch
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: "0"
-
-      - name: Write pr_files file
-        run: |
-          cat > pr_files.txt << 'EOF'
-          ${{ needs.get-pr-info.outputs.PR_FILES }}
-          EOF
-
-      - name: Write tests_dir file
-        run: |
-          cat > tests_dir.txt << 'EOF'
-          ${{ steps.repo_content.outputs.tests_dir }}
-          EOF
-
-      - name: Write tests_models_dir file
-        run: |
-          cat > tests_models_dir.txt << 'EOF'
-          ${{ steps.repo_content.outputs.tests_models_dir }}
-          EOF
-
-      - name: Write tests_quantization_dir file
-        run: |
-          cat > tests_quantization_dir.txt << 'EOF'
-          ${{ steps.repo_content.outputs.tests_quantization_dir }}
-          EOF
-
-      - name: Run script to get jobs to run
-        id: get_jobs
-        run: |
-          python utils/get_pr_run_slow_jobs.py | tee output.txt
-          echo "jobs_to_run: $(tail -n 1 output.txt)"
-          echo "jobs_to_run=$(tail -n 1 output.txt)" >> $GITHUB_OUTPUT
-
-  send_comment:
-    # Will delete the previous comment and send a new one if:
-    #   - either the content is changed
-    #   - or the previous comment is 30 minutes or more old
-    name: Send a comment to suggest jobs to run
-    if: ${{ needs.get-jobs.outputs.jobs != '' }}
-    needs: [get-pr-number, get-jobs]
-    permissions:
-      pull-requests: write
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Check and update comment if needed
-        uses: actions/github-script@v7
-        env:
-          BODY: "\n\nrun-slow: ${{ needs.get-jobs.outputs.jobs }}"
-        with:
-          script: |
-            const prNumber = ${{ needs.get-pr-number.outputs.PR_NUMBER }};
-            const commentPrefix = "**[For maintainers]** Suggested jobs to run (before merge)";
-            const thirtyMinutesAgo = new Date(Date.now() - 30 * 60 * 1000); // 30 minutes ago
-            const newBody = `${commentPrefix}${process.env.BODY}`;
-            
-            // Get all comments on the PR
-            const { data: comments } = await github.rest.issues.listComments({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              issue_number: prNumber
-            });
-            
-            // Find existing comments that start with our prefix
-            const existingComments = comments.filter(comment => 
-              comment.user.login === 'github-actions[bot]' && 
-              comment.body.startsWith(commentPrefix)
-            );
-            
-            let shouldCreateNewComment = true;
-            let commentsToDelete = [];
-            
-            if (existingComments.length > 0) {
-              // Get the most recent comment
-              const mostRecentComment = existingComments
-                .sort((a, b) => new Date(b.created_at) - new Date(a.created_at))[0];
-              
-              const commentDate = new Date(mostRecentComment.created_at);
-              const isOld = commentDate < thirtyMinutesAgo;
-              const isDifferentContent = mostRecentComment.body !== newBody;
-              
-              console.log(`Most recent comment created: ${mostRecentComment.created_at}`);
-              console.log(`Is older than 30 minutes: ${isOld}`);
-              console.log(`Has different content: ${isDifferentContent}`);
-              
-              if (isOld || isDifferentContent) {
-                // Delete all existing comments and create new one
-                commentsToDelete = existingComments;
-                console.log(`Will delete ${commentsToDelete.length} existing comment(s) and create new one`);
-              } else {
-                // Content is same and comment is recent, skip
-                shouldCreateNewComment = false;
-                console.log('Comment is recent and content unchanged, skipping update');
-              }
-            } else {
-              console.log('No existing comments found, will create new one');
-            }
-            
-            // Delete old comments if needed
-            for (const comment of commentsToDelete) {
-              console.log(`Deleting comment #${comment.id} (created: ${comment.created_at})`);
-              await github.rest.issues.deleteComment({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                comment_id: comment.id
-              });
-            }
-            
-            // Create new comment if needed
-            if (shouldCreateNewComment) {
-              await github.rest.issues.createComment({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                issue_number: prNumber,
-                body: newBody
-              });
-              console.log('✅ New comment created');
-            } else {
-              console.log('ℹ️ No comment update needed');
-            }
--- a/.github/workflows/push-important-models.yml
+++ b/.github/workflows/push-important-models.yml
@ -7,13 +7,14 @@ on:
 env:
  OUTPUT_SLACK_CHANNEL_ID: "C06L2SGMEEA"
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  OMP_NUM_THREADS: 8
-  MKL_NUM_THREADS: 8
-  RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  TF_FORCE_GPU_ALLOW_GROWTH: true
+  HF_HOME: /mnt/cache 
+  TRANSFORMERS_IS_CI: yes 
+  OMP_NUM_THREADS: 8 
+  MKL_NUM_THREADS: 8 
+  RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`. 
+  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} 
+  TF_FORCE_GPU_ALLOW_GROWTH: true 
+  RUN_PT_TF_CROSS_TESTS: 1

 jobs:
  get_modified_models:
@ -24,13 +25,13 @@ jobs:
    steps:
      - name: Check out code
        uses: actions/checkout@v4
-
+      
      - name: Get changed files
        id: changed-files
-        uses: tj-actions/changed-files@1c8e6069583811afb28f97afeaf8e7da80c6be5c
+        uses: tj-actions/changed-files@3f54ebb830831fc121d3263c1857cfbdc310cdb9 #v42
        with:
          files: src/transformers/models/**
-
+      
      - name: Run step if only the files listed above change
        if: steps.changed-files.outputs.any_changed == 'true'
        id: set-matrix
@ -59,41 +60,41 @@ jobs:
    if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' && fromJson(needs.get_modified_models.outputs.matrix)[0] != null }}
    strategy:
      fail-fast: false
-      matrix:
+      matrix: 
        model-name: ${{ fromJson(needs.get_modified_models.outputs.matrix) }}

    steps:
      - name: Check out code
        uses: actions/checkout@v4
-
+      
      - name: Install locally transformers & other libs
        run: |
          apt install sudo
          sudo -H pip install --upgrade pip
-          sudo -H pip uninstall -y transformers
-          sudo -H pip install -U -e ".[testing]"
+          sudo -H pip uninstall -y transformers 
+          sudo -H pip install -U -e ".[testing]" 
          MAX_JOBS=4 pip install flash-attn --no-build-isolation
          pip install bitsandbytes
-
+      
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
-
+      
      - name: Show installed libraries and their versions
        run: pip freeze
-
+      
      - name: Run FA2 tests
        id: run_fa2_tests
        run:
          pytest -rsfE -m "flash_attn_test" --make-reports=${{ matrix.model-name }}_fa2_tests/ tests/${{ matrix.model-name }}/test_modeling_*
-
+      
      - name: "Test suite reports artifacts: ${{ matrix.model-name }}_fa2_tests"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.model-name }}_fa2_tests
          path: /transformers/reports/${{ matrix.model-name }}_fa2_tests
-
+      
      - name: Post to Slack
        if: always()
        uses: huggingface/hf-workflows/.github/actions/post-slack@main
@ -102,13 +103,13 @@ jobs:
          title: 🤗 Results of the FA2 tests - ${{ matrix.model-name }}
          status: ${{ steps.run_fa2_tests.conclusion}}
          slack_token: ${{ secrets.CI_SLACK_BOT_TOKEN }}
-
+      
      - name: Run integration tests
        id: run_integration_tests
        if: always()
        run:
          pytest -rsfE -k "IntegrationTest"  --make-reports=tests_integration_${{ matrix.model-name }} tests/${{ matrix.model-name }}/test_modeling_*
-
+      
      - name: "Test suite reports artifacts: tests_integration_${{ matrix.model-name }}"
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
@ -118,7 +119,7 @@ jobs:

      - name: Post to Slack
        if: always()
-        uses: huggingface/hf-workflows/.github/actions/post-slack@main
+        uses: huggingface/hf-workflows/.github/actions/post-slack@main 
        with:
          slack_channel: ${{ env.OUTPUT_SLACK_CHANNEL_ID }}
          title: 🤗 Results of the Integration tests - ${{ matrix.model-name }}
--- a/.github/workflows/self-comment-ci.yml
+++ b/.github/workflows/self-comment-ci.yml
@ -22,6 +22,7 @@ env:
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
+  RUN_PT_TF_CROSS_TESTS: 1
  CUDA_VISIBLE_DEVICES: 0,1

 jobs:
@ -29,7 +30,7 @@ jobs:
    runs-on: ubuntu-22.04
    name: Get PR number
    # For security: only allow team members to run
-    if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "muellerzr", "eustlb", "MekkCyber", "manueldeprada", "vasqu", "ivarflakstad", "stevhliu", "ebezzam"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
+    if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }}
    outputs:
      PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
    steps:
@ -97,7 +98,6 @@ jobs:
    if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}}
    outputs:
      models: ${{ steps.models_to_run.outputs.models }}
-      quantizations: ${{ steps.models_to_run.outputs.quantizations }}
    steps:
      - uses: actions/checkout@v4
        with:
@ -121,8 +121,6 @@ jobs:
          python -m pip install GitPython
          python utils/pr_slow_ci_models.py --message "$PR_COMMENT" | tee output.txt
          echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV
-          python utils/pr_slow_ci_models.py --message "$PR_COMMENT" --quantization | tee output2.txt
-          echo "quantizations=$(tail -n 1 output2.txt)" >> $GITHUB_ENV

      - name: Show models to test
        id: models_to_run
@ -130,12 +128,10 @@ jobs:
          echo "${{ env.models }}"
          echo "models=${{ env.models }}" >> $GITHUB_ENV
          echo "models=${{ env.models }}" >> $GITHUB_OUTPUT
-          echo "${{ env.quantizations }}"
-          echo "quantizations=${{ env.quantizations }}" >> $GITHUB_OUTPUT

  reply_to_comment:
    name: Reply to the comment
-    if: ${{ needs.get-tests.outputs.models != '[]'  || needs.get-tests.outputs.quantizations != '[]' }}
+    if: ${{ needs.get-tests.outputs.models != '[]' }}
    needs: [get-pr-number, get-tests]
    permissions:
      pull-requests: write
@ -145,18 +141,17 @@ jobs:
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          MODELS: ${{ needs.get-tests.outputs.models }}
-          BODY: "\n\nmodels: ${{ needs.get-tests.outputs.models }}\nquantizations: ${{ needs.get-tests.outputs.quantizations }}"
        run: |
          gh api \
            --method POST \
            -H "Accept: application/vnd.github+json" \
            -H "X-GitHub-Api-Version: 2022-11-28" \
            repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \
-            -f "body=This comment contains run-slow, running the specified jobs: ${{ env.BODY }} ..."
+            -f "body=This comment contains run-slow, running the specified jobs: ${{ env.MODELS }} ..."

  create_run:
    name: Create run
-    if: ${{ needs.get-tests.outputs.models != '[]' || needs.get-tests.outputs.quantizations != '[]' }}
+    if: ${{ needs.get-tests.outputs.models != '[]' }}
    needs: [get-sha, get-tests, reply_to_comment]
    permissions:
      statuses: write
@ -178,20 +173,20 @@ jobs:
            -f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Slow CI job" -f "context=pytest/custom-tests"

  run_models_gpu:
-    name: Run all tests for the model
-    if: ${{ needs.get-tests.outputs.models != '[]' }}
-    needs: [get-pr-number, get-sha, get-tests, create_run]
-    strategy:
-      fail-fast: false
-      matrix:
-        folders: ${{ fromJson(needs.get-tests.outputs.models) }}
-        machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
-    runs-on:
-       group: '${{ matrix.machine_type }}'
-    container:
-      image: huggingface/transformers-all-latest-gpu
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
+      name: Run all tests for the model
+      if: ${{ needs.get-tests.outputs.models != '[]' }}
+      needs: [get-pr-number, get-sha, get-tests, create_run]
+      strategy:
+        fail-fast: false
+        matrix:
+          folders: ${{ fromJson(needs.get-tests.outputs.models) }}
+          machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+      runs-on:
+         group: '${{ matrix.machine_type }}'
+      container:
+        image: huggingface/transformers-all-latest-gpu
+        options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      steps:
      - name: Echo input and matrix info
        shell: bash
        run: |
@ -211,20 +206,20 @@ jobs:
      - name: Checkout to PR merge commit
        working-directory: /transformers
        run: |
-          git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
-          git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
-          git log -1 --format=%H
+            git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
+            git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
+            git log -1 --format=%H

      - name: Verify merge commit SHA
        env:
          VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
        working-directory: /transformers
        run: |
-          PR_MERGE_SHA=$(git log -1 --format=%H)
-          if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
-            echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
-            exit -1;
-          fi
+            PR_MERGE_SHA=$(git log -1 --format=%H)
+            if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
+              echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
+              exit -1;
+            fi

      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
@ -239,9 +234,9 @@ jobs:
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"
-          if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
+          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
+          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
@ -284,106 +279,9 @@ jobs:
          name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
          path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports

-  run_quantization_torch_gpu:
-    name: Run all tests for a quantization
-    if: ${{ needs.get-tests.outputs.quantizations != '[]' }}
-    needs: [get-pr-number, get-sha, get-tests, create_run]
-    strategy:
-      fail-fast: false
-      matrix:
-        folders: ${{ fromJson(needs.get-tests.outputs.quantizations) }}
-        machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
-    runs-on:
-      group: '${{ matrix.machine_type }}'
-    container:
-      image: huggingface/transformers-quantization-latest-gpu
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Echo folder ${{ matrix.folders }}
-        shell: bash
-        run: |
-          echo "${{ matrix.folders }}"
-          matrix_folders=${{ matrix.folders }}
-          matrix_folders=${matrix_folders/'quantization/'/'quantization_'}
-          echo "$matrix_folders"
-          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
-
-      - name: Checkout to PR merge commit
-        working-directory: /transformers
-        run: |
-          git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
-          git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge
-          git log -1 --format=%H
-
-      - name: Verify merge commit SHA
-        env:
-          VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }}
-        working-directory: /transformers
-        run: |
-          PR_MERGE_SHA=$(git log -1 --format=%H)
-          if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then
-            echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!";
-            exit -1;
-          fi
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-      - name: NVIDIA-SMI
-        run: |
-          nvidia-smi
-
-      - name: Set `machine_type` for report and artifact names
-        working-directory: /transformers
-        shell: bash
-        run: |
-          echo "${{ matrix.machine_type }}"
-          if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
-            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ matrix.machine_type }}
-          fi
-          echo "$machine_type"
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run quantization tests on GPU
-        working-directory: /transformers
-        run: |
-          python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }}
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
-
-      - name: Make sure report directory exists
-        shell: bash
-        run: |
-          mkdir -p /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports
-          echo "hello" > /transformers/reports/${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports/hello.txt
-          echo "${{ env.machine_type }}_run_quantization_gpu_${{ matrix.folders }}_test_reports"
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
-
  update_run_status:
    name: Update Check Run Status
-    needs: [get-sha, create_run, run_models_gpu, run_quantization_torch_gpu]
+    needs: [get-sha, create_run, run_models_gpu]
    permissions:
      statuses: write
    if: ${{ always() && needs.create_run.result == 'success' }}
@ -391,17 +289,16 @@ jobs:
    env:
      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
-      STATUS_OK: ${{ contains(fromJSON('["skipped", "success"]'), needs.run_models_gpu.result) && contains(fromJSON('["skipped", "success"]'), needs.run_quantization_torch_gpu.result) }}
    steps:
      - name: Get `run_models_gpu` job status
        run: |
          echo "${{ needs.run_models_gpu.result }}"
-          echo "${{ needs.run_quantization_torch_gpu.result }}"
-          echo $STATUS_OK
-          if [ "$STATUS_OK" = "true" ]; then
+          if [ "${{ needs.run_models_gpu.result }}" = "cancelled" ]; then
+            echo "STATUS=failure" >> $GITHUB_ENV
+          elif [ "${{ needs.run_models_gpu.result }}" = "skipped" ]; then
            echo "STATUS=success" >> $GITHUB_ENV
          else
-            echo "STATUS=failure" >> $GITHUB_ENV
+            echo "STATUS=${{ needs.run_models_gpu.result }}" >> $GITHUB_ENV
          fi

      - name: Update PR commit statuses
--- a/.github/workflows/self-push-amd-mi300-caller.yml
+++ b/.github/workflows/self-push-amd-mi300-caller.yml
@ -0,0 +1,25 @@
+name: Self-hosted runner (AMD mi300 CI caller)
+
+on:
+  #workflow_run:
+  #  workflows: ["Self-hosted runner (push-caller)"]
+  #  branches: ["main"]
+  #  types: [completed]
+  push:
+    branches:
+      - run_amd_push_ci_caller*
+    paths:
+      - "src/**"
+      - "tests/**"
+      - ".github/**"
+      - "templates/**"
+      - "utils/**"
+
+jobs:
+  run_amd_ci:
+    name: AMD mi300
+    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci'))))
+    uses: ./.github/workflows/self-push-amd.yml
+    with:
+      gpu_flavor: mi300
+    secrets: inherit
--- a/.github/workflows/self-push-amd.yml
+++ b/.github/workflows/self-push-amd.yml
@ -14,6 +14,7 @@ env:
  MKL_NUM_THREADS: 8
  PYTEST_TIMEOUT: 60
  TF_FORCE_GPU_ALLOW_GROWTH: true
+  RUN_PT_TF_CROSS_TESTS: 1
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}

 jobs:
--- a/.github/workflows/self-push-caller.yml
+++ b/.github/workflows/self-push-caller.yml
@ -25,7 +25,7 @@ jobs:
        
        - name: Get changed files
          id: changed-files
-          uses: tj-actions/changed-files@1c8e6069583811afb28f97afeaf8e7da80c6be5c
+          uses: tj-actions/changed-files@v41
        
        - name: Was setup changed 
          id: was_changed
@ -51,4 +51,4 @@ jobs:
    needs: build-docker-containers
    steps:
      - name: Trigger push CI via workflow_run
-        run: echo "Trigger push CI via workflow_run"
+        run: echo "Trigger push CI via workflow_run"
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@ -24,6 +24,7 @@ env:
  MKL_NUM_THREADS: 8
  PYTEST_TIMEOUT: 60
  TF_FORCE_GPU_ALLOW_GROWTH: true
+  RUN_PT_TF_CROSS_TESTS: 1
  CUDA_VISIBLE_DEVICES: 0,1

 jobs:
@ -31,12 +32,12 @@ jobs:
    name: Setup
    strategy:
      matrix:
-        machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
+        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
    runs-on:
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-all-latest-gpu-push-ci
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
      test_map: ${{ steps.set-matrix.outputs.test_map }}
@ -131,12 +132,12 @@ jobs:
      fail-fast: false
      matrix:
        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
-        machine_type: [aws-g5-4xlarge-cache]
+        machine_type: [aws-g4dn-2xlarge-cache]
    runs-on:
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-all-latest-gpu-push-ci
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    env:
      # For the meaning of these environment variables, see the job `Setup`
      CI_BRANCH_PUSH: ${{ github.event.ref }}
@ -169,9 +170,9 @@ jobs:
        run: |
          echo "${{ matrix.machine_type }}"

-          if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
+          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
+          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
@ -244,7 +245,7 @@ jobs:
      fail-fast: false
      matrix:
        folders: ${{ fromJson(needs.setup.outputs.matrix) }}
-        machine_type: [aws-g5-12xlarge-cache]
+        machine_type: [aws-g4dn-12xlarge-cache]
    runs-on:
      group: '${{ matrix.machine_type }}'
    container:
@ -282,9 +283,9 @@ jobs:
        run: |
          echo "${{ matrix.machine_type }}"

-          if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
+          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
+          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
@ -292,7 +293,7 @@ jobs:

          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
+          
      - name: Update clone using environment variables
        working-directory: /transformers
        run: |
@ -357,12 +358,12 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        machine_type: [aws-g5-4xlarge-cache]
+        machine_type: [aws-g4dn-2xlarge-cache]
    runs-on:
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    env:
      # For the meaning of these environment variables, see the job `Setup`
      CI_BRANCH_PUSH: ${{ github.event.ref }}
@ -395,9 +396,9 @@ jobs:
        run: |
          echo "${{ matrix.machine_type }}"

-          if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
+          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
+          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
@ -405,7 +406,7 @@ jobs:

          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
+          
      - name: Update clone using environment variables
        working-directory: /workspace/transformers
        run: |
@ -467,7 +468,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        machine_type: [aws-g5-12xlarge-cache]
+        machine_type: [aws-g4dn-12xlarge-cache]
    runs-on:
      group: '${{ matrix.machine_type }}'
    container:
@ -505,9 +506,9 @@ jobs:
        run: |
          echo "${{ matrix.machine_type }}"

-          if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
+          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
+          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
@ -515,7 +516,7 @@ jobs:

          echo "$machine_type"
          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
+          
      - name: Update clone using environment variables
        working-directory: /workspace/transformers
        run: |
@ -647,6 +648,6 @@ jobs:
        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
        run: |
          pip install huggingface_hub
-          pip install slack_sdk
+          pip install slack_sdk 
          pip show slack_sdk
          python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
--- a/.github/workflows/self-scheduled-amd-mi210-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi210-caller.yml
@ -0,0 +1,55 @@
+name: Self-hosted runner (AMD mi210 scheduled CI caller)
+
+on:
+  workflow_run:
+    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
+    branches: ["main"]
+    types: [completed]
+  push:
+    branches:
+      - run_amd_scheduled_ci_caller*
+
+jobs:
+  model-ci:
+    name: Model CI
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
+    with:
+      job: run_models_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi210
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi210
+    secrets: inherit
+
+  torch-pipeline:
+    name: Torch pipeline CI
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
+    with:
+      job: run_pipelines_torch_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi210
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi210
+    secrets: inherit
+
+  example-ci:
+    name: Example CI
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
+    with:
+      job: run_examples_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi210
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi210
+    secrets: inherit
+
+  deepspeed-ci:
+    name: DeepSpeed CI
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
+    with:
+      job: run_torch_cuda_extensions_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi210
+      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi210
+    secrets: inherit
--- a/.github/workflows/self-scheduled-amd-mi250-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi250-caller.yml
@ -19,7 +19,6 @@ jobs:
      runner: mi250
      docker: huggingface/transformers-pytorch-amd-gpu
      ci_event: Scheduled CI (AMD) - mi250
-      report_repo_id: optimum-amd/transformers_daily_ci
    secrets: inherit

  torch-pipeline:
@ -31,7 +30,6 @@ jobs:
      runner: mi250
      docker: huggingface/transformers-pytorch-amd-gpu
      ci_event: Scheduled CI (AMD) - mi250
-      report_repo_id: optimum-amd/transformers_daily_ci
    secrets: inherit

  example-ci:
@ -43,7 +41,6 @@ jobs:
      runner: mi250
      docker: huggingface/transformers-pytorch-amd-gpu
      ci_event: Scheduled CI (AMD) - mi250
-      report_repo_id: optimum-amd/transformers_daily_ci
    secrets: inherit

  deepspeed-ci:
@ -55,5 +52,4 @@ jobs:
      runner: mi250
      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
      ci_event: Scheduled CI (AMD) - mi250
-      report_repo_id: optimum-amd/transformers_daily_ci
    secrets: inherit
--- a/.github/workflows/self-scheduled-amd-mi325-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi325-caller.yml
@ -1,67 +0,0 @@
-name: Self-hosted runner scale set (AMD mi325 scheduled CI caller)
-
-# Note: For every job in this workflow, the name of the runner scale set is finalized in the runner yaml i.e. huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml
-# For example, 1gpu scale set: amd-mi325-ci-1gpu
-#              2gpu scale set: amd-mi325-ci-2gpu
-
-on:
-  workflow_run:
-    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
-    branches: ["main"]
-    types: [completed]
-  push:
-    branches:
-      - run_amd_scheduled_ci_caller*
-
-jobs:
-  model-ci:
-    name: Model CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
-    with:
-      job: run_models_gpu
-      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi325-ci
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi325
-      report_repo_id: optimum-amd/transformers_daily_ci
-      env_file: /etc/podinfo/gha-gpu-isolation-settings
-    secrets: inherit
-
-  torch-pipeline:
-    name: Torch pipeline CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
-    with:
-      job: run_pipelines_torch_gpu
-      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi325-ci
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi325
-      report_repo_id: optimum-amd/transformers_daily_ci
-      env_file: /etc/podinfo/gha-gpu-isolation-settings
-    secrets: inherit
-
-  example-ci:
-    name: Example CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
-    with:
-      job: run_examples_gpu
-      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi325-ci
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi325
-      report_repo_id: optimum-amd/transformers_daily_ci
-      env_file: /etc/podinfo/gha-gpu-isolation-settings
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi325-ci
-      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi325
-      report_repo_id: optimum-amd/transformers_daily_ci
-      env_file: /etc/podinfo/gha-gpu-isolation-settings
-    secrets: inherit
--- a/.github/workflows/self-scheduled-amd-mi355-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi355-caller.yml
@ -1,63 +0,0 @@
-name: Self-hosted runner scale set (AMD mi355 scheduled CI caller)
-
-# Note: For every job in this workflow, the name of the runner scale set is finalized in the runner yaml i.e. huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml
-# For example, 1gpu : amd-mi355-ci-1gpu
-#              2gpu : amd-mi355-ci-2gpu
-
-on:
-  workflow_run:
-    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
-    branches: ["main"]
-    types: [completed]
-  push:
-    branches:
-      - run_amd_scheduled_ci_caller*
-
-jobs:
-  model-ci:
-    name: Model CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
-    with:
-      job: run_models_gpu
-      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi355-ci
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi355
-      report_repo_id: optimum-amd/transformers_daily_ci
-    secrets: inherit
-
-  torch-pipeline:
-    name: Torch pipeline CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
-    with:
-      job: run_pipelines_torch_gpu
-      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi355-ci
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi355
-      report_repo_id: optimum-amd/transformers_daily_ci
-    secrets: inherit
-
-  example-ci:
-    name: Example CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
-    with:
-      job: run_examples_gpu
-      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi355-ci
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi355
-      report_repo_id: optimum-amd/transformers_daily_ci
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml@main
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi355-ci
-      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi355
-      report_repo_id: optimum-amd/transformers_daily_ci
-    secrets: inherit
--- a/.github/workflows/self-scheduled-caller.yml
+++ b/.github/workflows/self-scheduled-caller.yml
@ -7,51 +7,72 @@ on:
    - cron: "17 2 * * *"
  push:
    branches:
-      - check_fa2
-  workflow_dispatch:
-    inputs:
-      prev_workflow_run_id:
-        description: 'previous workflow run id to compare'
-        type: string
-        required: false
-        default: ""
-      other_workflow_run_id:
-        description: 'other workflow run id to compare'
-        type: string
-        required: false
-        default: ""
-
-
-# Used for `push` to easily modify the target workflow runs to compare against
-env:
-    prev_workflow_run_id: ""
-    other_workflow_run_id: ""
-
+      - run_scheduled_ci*

 jobs:
-  setup:
-    name: Setup
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Setup
-        run: |
-          mkdir "setup_values"
-          echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt"
-          echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt"
-
-      - name: Upload artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: setup_values
-          path: setup_values
-
  model-ci:
    name: Model CI
    uses: ./.github/workflows/self-scheduled.yml
    with:
      job: run_models_gpu
-      slack_report_channel: "#transformers-ci-dummy"
+      slack_report_channel: "#transformers-ci-daily-models"
+      runner: daily-ci
      docker: huggingface/transformers-all-latest-gpu
      ci_event: Daily CI
-      report_repo_id: hf-internal-testing/transformers_daily_ci
+    secrets: inherit
+
+  torch-pipeline:
+    name: Torch pipeline CI
+    uses: ./.github/workflows/self-scheduled.yml
+    with:
+      job: run_pipelines_torch_gpu
+      slack_report_channel: "#transformers-ci-daily-pipeline-torch"
+      runner: daily-ci
+      docker: huggingface/transformers-pytorch-gpu
+      ci_event: Daily CI
+    secrets: inherit
+
+  tf-pipeline:
+    name: TF pipeline CI
+    uses: ./.github/workflows/self-scheduled.yml
+    with:
+      job: run_pipelines_tf_gpu
+      slack_report_channel: "#transformers-ci-daily-pipeline-tf"
+      runner: daily-ci
+      docker: huggingface/transformers-tensorflow-gpu
+      ci_event: Daily CI
+    secrets: inherit
+
+  example-ci:
+    name: Example CI
+    uses: ./.github/workflows/self-scheduled.yml
+    with:
+      job: run_examples_gpu
+      slack_report_channel: "#transformers-ci-daily-examples"
+      runner: daily-ci
+      docker: huggingface/transformers-all-latest-gpu
+      ci_event: Daily CI
+    secrets: inherit
+
+  deepspeed-ci:
+    name: DeepSpeed CI
+    uses: ./.github/workflows/self-scheduled.yml
+    with:
+      job: run_torch_cuda_extensions_gpu
+      slack_report_channel: "#transformers-ci-daily-deepspeed"
+      runner: daily-ci
+      docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
+      ci_event: Daily CI
+      working-directory-prefix: /workspace
+    secrets: inherit
+
+  quantization-ci:
+    name: Quantization CI
+    uses: ./.github/workflows/self-scheduled.yml
+    with:
+      job: run_quantization_torch_gpu
+      slack_report_channel: "#transformers-ci-daily-quantization"
+      runner: daily-ci
+      docker: huggingface/transformers-quantization-latest-gpu
+      ci_event: Daily CI
    secrets: inherit
--- a/.github/workflows/self-scheduled-intel-gaudi.yml
+++ b/.github/workflows/self-scheduled-intel-gaudi.yml
@ -1,342 +0,0 @@
-name: Self-hosted runner (scheduled-intel-gaudi)
-
-on:
-  workflow_call:
-    inputs:
-      job:
-        required: true
-        type: string
-      slack_report_channel:
-        required: true
-        type: string
-      runner_scale_set:
-        required: true
-        type: string
-      ci_event:
-        required: true
-        type: string
-      report_repo_id:
-        required: true
-        type: string
-
-env:
-  NUM_SLICES: 2
-  RUN_SLOW: yes
-  PT_HPU_LAZY_MODE: 0
-  TRANSFORMERS_IS_CI: yes
-  PT_ENABLE_INT64_SUPPORT: 1
-  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  HF_HOME: /mnt/cache/.cache/huggingface
-
-jobs:
-  setup:
-    if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu"]'), inputs.job)
-    name: Setup
-    runs-on: ubuntu-latest
-    outputs:
-      slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
-      folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
-      quantization_matrix: ${{ steps.set-matrix.outputs.quantization_matrix }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.10"
-
-      - id: set-matrix
-        if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu"]'), inputs.job)
-        name: Identify models to test
-        working-directory: tests
-        run: |
-          if [ "${{ inputs.job }}" = "run_models_gpu" ]; then
-            echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
-            echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
-          elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then
-            echo "folder_slices=[['trainer'], ['fsdp']]" >> $GITHUB_OUTPUT
-            echo "slice_ids=[0, 1]" >> $GITHUB_OUTPUT
-          fi
-
-      - id: set-matrix-quantization
-        if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
-        name: Identify quantization method to test
-        working-directory: tests
-        run: |
-          echo "quantization_matrix=$(python3 -c 'import os; tests = os.getcwd(); quantization_tests = os.listdir(os.path.join(tests, "quantization")); d = sorted(list(filter(os.path.isdir, [f"quantization/{x}" for x in quantization_tests]))) ;  print(d)')" >> $GITHUB_OUTPUT
-
-  run_models_gpu:
-    if: ${{ inputs.job == 'run_models_gpu' }}
-    name: " "
-    needs: setup
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [1gaudi, 2gaudi]
-        slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
-    uses: ./.github/workflows/model_jobs_intel_gaudi.yml
-    with:
-      slice_id: ${{ matrix.slice_id }}
-      machine_type: ${{ matrix.machine_type }}
-      folder_slices: ${{ needs.setup.outputs.folder_slices }}
-      runner: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
-    secrets: inherit
-
-  run_trainer_and_fsdp_gpu:
-    if: ${{ inputs.job == 'run_trainer_and_fsdp_gpu' }}
-    name: " "
-    needs: setup
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [1gaudi, 2gaudi]
-        slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
-    uses: ./.github/workflows/model_jobs_intel_gaudi.yml
-    with:
-      slice_id: ${{ matrix.slice_id }}
-      machine_type: ${{ matrix.machine_type }}
-      folder_slices: ${{ needs.setup.outputs.folder_slices }}
-      runner: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
-      report_name_prefix: run_trainer_and_fsdp_gpu
-    secrets: inherit
-
-  run_pipelines_torch_gpu:
-    if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
-    name: Pipelines
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [1gaudi, 2gaudi]
-    runs-on:
-      group: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
-    container:
-      image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
-      options: --runtime=habana
-        -v /mnt/cache/.cache/huggingface:/mnt/cache/.cache/huggingface
-        --env OMPI_MCA_btl_vader_single_copy_mechanism=none
-        --env HABANA_VISIBLE_DEVICES
-        --env HABANA_VISIBLE_MODULES
-        --cap-add=sys_nice
-        --shm-size=64G
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Install dependencies
-        run: |
-          pip install -e .[testing,torch] "numpy<2.0.0" scipy scikit-learn librosa soundfile
-
-      - name: HL-SMI
-        run: |
-          hl-smi
-          echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
-          echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
-
-      - name: Environment
-        run: python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        run: pip freeze
-
-      - name: Set `machine_type` for report and artifact names
-        shell: bash
-        run: |
-          if [ "${{ matrix.machine_type }}" = "1gaudi" ]; then
-            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "2gaudi" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ matrix.machine_type }}
-          fi
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Run all pipeline tests on Intel Gaudi
-        run: |
-          python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test"
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: |
-          cat reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
-          path: reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
-
-  run_examples_gpu:
-    if: ${{ inputs.job == 'run_examples_gpu' }}
-    name: Examples directory
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [1gaudi]
-    runs-on:
-      group: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
-    container:
-      image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
-      options: --runtime=habana
-        -v /mnt/cache/.cache/huggingface:/mnt/cache/.cache/huggingface
-        --env OMPI_MCA_btl_vader_single_copy_mechanism=none
-        --env HABANA_VISIBLE_DEVICES
-        --env HABANA_VISIBLE_MODULES
-        --cap-add=sys_nice
-        --shm-size=64G
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Install dependencies
-        run: |
-          pip install -e .[testing,torch] "numpy<2.0.0" scipy scikit-learn librosa soundfile
-
-      - name: HL-SMI
-        run: |
-          hl-smi
-          echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
-          echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
-
-      - name: Environment
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        run: |
-          pip freeze
-
-      - name: Set `machine_type` for report and artifact names
-        shell: bash
-        run: |
-          if [ "${{ matrix.machine_type }}" = "1gaudi" ]; then
-            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "2gaudi" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ matrix.machine_type }}
-          fi
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Run examples tests on Intel Gaudi
-        run: |
-          pip install -r examples/pytorch/_tests_requirements.txt
-          python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_examples_gpu_test_reports examples/pytorch -m "not not_device_test"
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: |
-          cat reports/${{ env.machine_type }}_run_examples_gpu_test_reports/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_examples_gpu_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ env.machine_type }}_run_examples_gpu_test_reports
-          path: reports/${{ env.machine_type }}_run_examples_gpu_test_reports
-
-  run_torch_cuda_extensions_gpu:
-    if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }}
-    name: Intel Gaudi deepspeed tests
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [1gaudi, 2gaudi]
-    runs-on:
-      group: ${{ inputs.runner_scale_set }}-${{ matrix.machine_type }}
-    container:
-      image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
-      options: --runtime=habana
-        -v /mnt/cache/.cache/huggingface:/mnt/cache/.cache/huggingface
-        --env OMPI_MCA_btl_vader_single_copy_mechanism=none
-        --env HABANA_VISIBLE_DEVICES
-        --env HABANA_VISIBLE_MODULES
-        --cap-add=sys_nice
-        --shm-size=64G
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Install dependencies
-        run: |
-          pip install -e .[testing,torch] "numpy<2.0.0" scipy scikit-learn librosa soundfile
-          pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.20.0
-
-      - name: HL-SMI
-        run: |
-          hl-smi
-          echo "HABANA_VISIBLE_DEVICES=${HABANA_VISIBLE_DEVICES}"
-          echo "HABANA_VISIBLE_MODULES=${HABANA_VISIBLE_MODULES}"
-
-      - name: Environment
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        run: |
-          pip freeze
-
-      - name: Set `machine_type` for report and artifact names
-        shell: bash
-        run: |
-          if [ "${{ matrix.machine_type }}" = "1gaudi" ]; then
-            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "2gaudi" ]; then
-            machine_type=multi-gpu
-          else
-            machine_type=${{ matrix.machine_type }}
-          fi
-          echo "machine_type=$machine_type" >> $GITHUB_ENV
-
-      - name: Run all deepspeed tests on intel Gaudi
-        run: |
-          python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed -m "not not_device_test"
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: |
-          cat reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
-          path: reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
-
-  send_results:
-    name: Slack Report
-    needs:
-      [
-        setup,
-        run_models_gpu,
-        run_examples_gpu,
-        run_torch_cuda_extensions_gpu,
-        run_pipelines_torch_gpu,
-        run_trainer_and_fsdp_gpu,
-      ]
-    if: ${{ always() }}
-    uses: ./.github/workflows/slack-report.yml
-    with:
-      job: ${{ inputs.job }}
-      setup_status: ${{ needs.setup.result }}
-      slack_report_channel: ${{ inputs.slack_report_channel }}
-      quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
-      folder_slices: ${{ needs.setup.outputs.folder_slices }}
-      report_repo_id: ${{ inputs.report_repo_id }}
-      ci_event: ${{ inputs.ci_event }}
-
-    secrets: inherit
--- a/.github/workflows/self-scheduled-intel-gaudi3-caller.yml
+++ b/.github/workflows/self-scheduled-intel-gaudi3-caller.yml
@ -1,67 +0,0 @@
-name: Self-hosted runner (Intel Gaudi3 scheduled CI caller)
-
-on:
-  repository_dispatch:
-  workflow_dispatch:
-  schedule:
-    - cron: "17 2 * * *"
-
-jobs:
-  model-ci:
-    name: Model CI
-    uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
-    with:
-      job: run_models_gpu
-      ci_event: Scheduled CI (Intel) - Gaudi3
-      runner_scale_set: itac-bm-emr-gaudi3-dell
-      slack_report_channel: "#transformers-ci-daily-intel-gaudi3"
-      report_repo_id: optimum-intel/transformers_daily_ci_intel_gaudi3
-
-    secrets: inherit
-
-  pipeline-ci:
-    name: Pipeline CI
-    uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
-    with:
-      job: run_pipelines_torch_gpu
-      ci_event: Scheduled CI (Intel) - Gaudi3
-      runner_scale_set: itac-bm-emr-gaudi3-dell
-      slack_report_channel: "#transformers-ci-daily-intel-gaudi3"
-      report_repo_id: optimum-intel/transformers_daily_ci_intel_gaudi3
-
-    secrets: inherit
-
-  example-ci:
-    name: Example CI
-    uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
-    with:
-      job: run_examples_gpu
-      ci_event: Scheduled CI (Intel) - Gaudi3
-      runner_scale_set: itac-bm-emr-gaudi3-dell
-      slack_report_channel: "#transformers-ci-daily-intel-gaudi3"
-      report_repo_id: optimum-intel/transformers_daily_ci_intel_gaudi3
-
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
-    with:
-      job: run_torch_cuda_extensions_gpu
-      ci_event: Scheduled CI (Intel) - Gaudi3
-      runner_scale_set: itac-bm-emr-gaudi3-dell
-      slack_report_channel: "#transformers-ci-daily-intel-gaudi3"
-      report_repo_id: optimum-intel/transformers_daily_ci_intel_gaudi3
-
-    secrets: inherit
-
-  trainer-fsdp-ci:
-    name: Trainer/FSDP CI
-    uses: ./.github/workflows/self-scheduled-intel-gaudi.yml
-    with:
-      job: run_trainer_and_fsdp_gpu
-      ci_event: Scheduled CI (Intel) - Gaudi3
-      runner_scale_set: itac-bm-emr-gaudi3-dell
-      slack_report_channel: "#transformers-ci-daily-intel-gaudi3"
-      report_repo_id: optimum-intel/transformers_daily_ci_intel_gaudi3
-    secrets: inherit
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@ -15,6 +15,9 @@ on:
      slack_report_channel:
        required: true
        type: string
+      runner:
+        required: true
+        type: string
      docker:
        required: true
        type: string
@ -25,10 +28,6 @@ on:
        default: ''
        required: false
        type: string
-      report_repo_id:
-        required: true
-        type: string
-

 env:
  HF_HOME: /mnt/cache
@ -41,25 +40,25 @@ env:
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
  TF_FORCE_GPU_ALLOW_GROWTH: true
+  RUN_PT_TF_CROSS_TESTS: 1
  CUDA_VISIBLE_DEVICES: 0,1
  NUM_SLICES: 2

 jobs:
  setup:
-    if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu", "run_quantization_torch_gpu"]'), inputs.job)
+    if: contains(fromJSON('["run_models_gpu", "run_quantization_torch_gpu"]'), inputs.job)
    name: Setup
    strategy:
      matrix:
-        machine_type: [aws-g5-4xlarge-cache]
+        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
    runs-on:
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-all-latest-gpu
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    outputs:
      folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
      slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
-      runner_map: ${{ steps.set-matrix.outputs.runner_map }}
      quantization_matrix: ${{ steps.set-matrix-quantization.outputs.quantization_matrix }}
    steps:
      - name: Update clone
@ -79,18 +78,12 @@ jobs:
        run: pip freeze

      - id: set-matrix
-        if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu"]'), inputs.job)
+        if: ${{ inputs.job == 'run_models_gpu' }}
        name: Identify models to test
        working-directory: /transformers/tests
        run: |
-          if [ "${{ inputs.job }}" = "run_models_gpu" ]; then
-            echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
-            echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
-            echo "runner_map=$(python3 ../utils/get_runner_map.py)" >> $GITHUB_OUTPUT
-          elif [ "${{ inputs.job }}" = "run_trainer_and_fsdp_gpu" ]; then
-            echo "folder_slices=[['trainer'], ['fsdp']]" >> $GITHUB_OUTPUT
-            echo "slice_ids=[0, 1]" >> $GITHUB_OUTPUT
-          fi
+          echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
+          echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT

      - id: set-matrix-quantization
        if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
@ -110,43 +103,24 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        machine_type: [single-gpu]
+        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
        slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
    uses: ./.github/workflows/model_jobs.yml
    with:
      folder_slices: ${{ needs.setup.outputs.folder_slices }}
      machine_type: ${{ matrix.machine_type }}
      slice_id: ${{ matrix.slice_id }}
-      runner_map: ${{ needs.setup.outputs.runner_map }}
+      runner: ${{ inputs.runner }}
      docker: ${{ inputs.docker }}
    secrets: inherit

-  run_trainer_and_fsdp_gpu:
-    if: ${{ inputs.job == 'run_trainer_and_fsdp_gpu' }}
-    name: " "
-    needs: setup
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
-        slice_id: [0, 1]
-    uses: ./.github/workflows/model_jobs.yml
-    with:
-      folder_slices: ${{ needs.setup.outputs.folder_slices }}
-      machine_type: ${{ matrix.machine_type }}
-      slice_id: ${{ matrix.slice_id }}
-      runner_map: ${{ needs.setup.outputs.runner_map }}
-      docker: ${{ inputs.docker }}
-      report_name_prefix: run_trainer_and_fsdp_gpu
-    secrets: inherit
-
  run_pipelines_torch_gpu:
    if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
    name: PyTorch pipelines
    strategy:
      fail-fast: false
      matrix:
-        machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
+        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
    runs-on:
      group: '${{ matrix.machine_type }}'
    container:
@ -180,9 +154,9 @@ jobs:
        run: |
          echo "${{ matrix.machine_type }}"

-          if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
+          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
+          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
@ -208,18 +182,87 @@ jobs:
          name: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports
          path: /transformers/reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports

+  run_pipelines_tf_gpu:
+    if: ${{ inputs.job == 'run_pipelines_tf_gpu' }}
+    name: TensorFlow pipelines
+    strategy:
+      fail-fast: false
+      matrix:
+        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
+    runs-on:
+      group: '${{ matrix.machine_type }}'
+    container:
+      image: huggingface/transformers-tensorflow-gpu
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - name: Update clone
+        working-directory: /transformers
+        run: |
+          git fetch && git checkout ${{ github.sha }}
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Set `machine_type` for report and artifact names
+        working-directory: /transformers
+        shell: bash
+        run: |
+          echo "${{ matrix.machine_type }}"
+
+          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
+            machine_type=single-gpu
+          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
+            machine_type=multi-gpu
+          else
+            machine_type=${{ matrix.machine_type }}
+          fi
+
+          echo "$machine_type"
+          echo "machine_type=$machine_type" >> $GITHUB_ENV
+
+      - name: Run all pipeline tests on GPU
+        working-directory: /transformers
+        run: |
+          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports tests/pipelines
+
+      - name: Failure short reports
+        if: ${{ always() }}
+        run: |
+          cat /transformers/reports/${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports
+          path: /transformers/reports/${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports
+
  run_examples_gpu:
    if: ${{ inputs.job == 'run_examples_gpu' }}
    name: Examples directory
    strategy:
      fail-fast: false
      matrix:
-        machine_type: [aws-g5-4xlarge-cache]
+        machine_type: [aws-g4dn-2xlarge-cache]
    runs-on:
      group: '${{ matrix.machine_type }}'
    container:
      image: huggingface/transformers-all-latest-gpu
-      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
      - name: Update clone
        working-directory: /transformers
@ -248,9 +291,9 @@ jobs:
        run: |
          echo "${{ matrix.machine_type }}"

-          if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
+          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
+          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
@ -283,7 +326,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
+        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
    runs-on:
      group: '${{ matrix.machine_type }}'
    container:
@ -323,7 +366,7 @@ jobs:
        run: |
          python3 -m pip uninstall -y deepspeed
          rm -rf DeepSpeed
-          git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build
+          git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build
          DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check

      - name: NVIDIA-SMI
@ -340,14 +383,14 @@ jobs:
        run: pip freeze

      - name: Set `machine_type` for report and artifact names
-        working-directory: ${{ inputs.working-directory-prefix }}/transformers
+        working-directory: /transformers
        shell: bash
        run: |
          echo "${{ matrix.machine_type }}"

-          if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
+          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
+          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
@ -382,7 +425,7 @@ jobs:
      fail-fast: false
      matrix:
        folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }}
-        machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
+        machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache]
    runs-on:
      group: '${{ matrix.machine_type }}'
    container:
@ -425,9 +468,9 @@ jobs:
        run: |
          echo "${{ matrix.machine_type }}"

-          if [ "${{ matrix.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
+          if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then
            machine_type=single-gpu
-          elif [ "${{ matrix.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
+          elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
            machine_type=multi-gpu
          else
            machine_type=${{ matrix.machine_type }}
@ -452,3 +495,80 @@ jobs:
        with:
          name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports
          path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports
+
+  run_extract_warnings:
+    # Let's only do this for the job `run_models_gpu` to simplify the (already complex) logic.
+    if: ${{ always() && inputs.job == 'run_models_gpu' }}
+    name: Extract warnings in CI artifacts
+    runs-on: ubuntu-22.04
+    needs: [setup, run_models_gpu]
+    steps:
+      - name: Checkout transformers
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 2
+
+      - name: Install transformers
+        run: pip install transformers
+
+      - name: Show installed libraries and their versions
+        run: pip freeze
+
+      - name: Create output directory
+        run: mkdir warnings_in_ci
+
+      - uses: actions/download-artifact@v4
+        with:
+          path: warnings_in_ci
+
+      - name: Show artifacts
+        run: echo "$(python3 -c 'import os; d = os.listdir(); print(d)')"
+        working-directory: warnings_in_ci
+
+      - name: Extract warnings in CI artifacts
+        run: |
+          python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh
+          echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')"
+
+      - name: Upload artifact
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: warnings_in_ci
+          path: warnings_in_ci/selected_warnings.json
+
+  send_results:
+    name: Slack Report
+    needs: [
+      setup,
+      run_models_gpu,
+      run_pipelines_torch_gpu,
+      run_pipelines_tf_gpu,
+      run_examples_gpu,
+      run_torch_cuda_extensions_gpu,
+      run_quantization_torch_gpu,
+      run_extract_warnings
+    ]
+    if: ${{ always() }}
+    uses: ./.github/workflows/slack-report.yml
+    with:
+      job: ${{ inputs.job }}
+      # This would be `skipped` if `setup` is skipped.
+      setup_status: ${{ needs.setup.result }}
+      slack_report_channel: ${{ inputs.slack_report_channel }}
+      # This would be an empty string if `setup` is skipped.
+      folder_slices: ${{ needs.setup.outputs.folder_slices }}
+      quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
+      ci_event: ${{ inputs.ci_event }}
+
+    secrets: inherit
+
+  check_new_model_failures:
+    if: ${{ always() && inputs.ci_event == 'Daily CI' && inputs.job == 'run_models_gpu' && needs.send_results.result == 'success' }}
+    name: Check new model failures
+    needs: send_results
+    uses: ./.github/workflows/check_failed_model_tests.yml
+    with:
+      docker: ${{ inputs.docker }}
+      start_sha: ${{ github.sha }}
+    secrets: inherit
--- a/.github/workflows/slack-report.yml
+++ b/.github/workflows/slack-report.yml
@ -21,9 +21,6 @@ on:
      ci_event:
        required: true
        type: string
-      report_repo_id:
-        required: true
-        type: string

 env:
  TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
@ -42,23 +39,8 @@ jobs:

      - uses: actions/checkout@v4
      - uses: actions/download-artifact@v4
-
-      - name: Prepare some setup values
-        run: |
-          if [ -f setup_values/prev_workflow_run_id.txt ]; then
-            echo "PREV_WORKFLOW_RUN_ID=$(cat setup_values/prev_workflow_run_id.txt)" >> $GITHUB_ENV
-          else
-            echo "PREV_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
-          fi
-
-          if [ -f setup_values/other_workflow_run_id.txt ]; then
-            echo "OTHER_WORKFLOW_RUN_ID=$(cat setup_values/other_workflow_run_id.txt)" >> $GITHUB_ENV
-          else
-            echo "OTHER_WORKFLOW_RUN_ID=" >> $GITHUB_ENV
-          fi
-
      - name: Send message to Slack
-        shell: bash
+        if: ${{ inputs.job != 'run_quantization_torch_gpu' }}
        env:
          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
          CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
@ -68,22 +50,19 @@ jobs:
          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
          CI_EVENT: ${{ inputs.ci_event }}
          CI_SHA: ${{ github.sha }}
+          CI_WORKFLOW_REF: ${{ github.workflow_ref }}
          CI_TEST_JOB: ${{ inputs.job }}
          SETUP_STATUS: ${{ inputs.setup_status }}
-          REPORT_REPO_ID: ${{ inputs.report_repo_id }}
        # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
        # For a job that doesn't depend on (i.e. `needs`) `setup`, the value for `inputs.folder_slices` would be an
        # empty string, and the called script still get one argument (which is the emtpy string).
        run: |
+          sudo apt-get install -y curl
          pip install huggingface_hub
          pip install slack_sdk
          pip show slack_sdk
-          if [ "${{ inputs.quantization_matrix }}" != "" ]; then
-            python utils/notification_service.py "${{ inputs.quantization_matrix }}"
-          else
-            python utils/notification_service.py "${{ inputs.folder_slices }}"
-          fi          
+          python utils/notification_service.py "${{ inputs.folder_slices }}"

      # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
      - name: Failure table artifacts
@ -91,3 +70,32 @@ jobs:
        with:
          name: ci_results_${{ inputs.job }}
          path: ci_results_${{ inputs.job }}
+
+      - uses: actions/checkout@v4
+      - uses: actions/download-artifact@v4
+      - name: Send message to Slack for quantization workflow
+        if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
+        env:
+          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
+          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
+          SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
+          CI_EVENT: ${{ inputs.ci_event }}
+          CI_SHA: ${{ github.sha }}
+          CI_TEST_JOB: ${{ inputs.job }}
+          SETUP_STATUS: ${{ inputs.setup_status }}
+        # We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change
+        # `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
+        run: |
+          sudo apt-get install -y curl
+          pip install huggingface_hub
+          pip install slack_sdk
+          pip show slack_sdk
+          python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}"
+
+      # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
+      - name: Failure table artifacts
+        if: ${{ inputs.job == 'run_quantization_torch_gpu' }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ci_results_${{ inputs.job }}
+          path: ci_results_${{ inputs.job }}
--- a/.github/workflows/ssh-runner.yml
+++ b/.github/workflows/ssh-runner.yml
@ -5,7 +5,7 @@ on:
    inputs:
      runner_type:
        description: 'Type of runner to test (a10 or t4)'
-        required: true
+        required: true 
      docker_image:
        description: 'Name of the Docker image'
        required: true
@ -15,14 +15,15 @@ on:

 env:
  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  OMP_NUM_THREADS: 8
-  MKL_NUM_THREADS: 8
-  RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`.
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  TF_FORCE_GPU_ALLOW_GROWTH: true
+  HF_HOME: /mnt/cache 
+  TRANSFORMERS_IS_CI: yes 
+  OMP_NUM_THREADS: 8 
+  MKL_NUM_THREADS: 8 
+  RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`. 
+  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} 
+  TF_FORCE_GPU_ALLOW_GROWTH: true 
  CUDA_VISIBLE_DEVICES: 0,1
+  RUN_PT_TF_CROSS_TESTS: 1

 jobs:
  get_runner:
@ -35,7 +36,7 @@ jobs:
        shell: bash
        run: |
          if [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then
-            echo "RUNNER=aws-g4dn-4xlarge-cache" >> $GITHUB_ENV
+            echo "RUNNER=aws-g4dn-2xlarge-cache" >> $GITHUB_ENV
          elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then
            echo "RUNNER=aws-g4dn-12xlarge-cache" >> $GITHUB_ENV
          elif [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then
@ -77,7 +78,7 @@ jobs:
      - name: Show installed libraries and their versions
        working-directory: /transformers
        run: pip freeze
-
+      
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
--- a/.github/workflows/trufflehog.yml
+++ b/.github/workflows/trufflehog.yml
@ -16,5 +16,3 @@ jobs:
          fetch-depth: 0
      - name: Secret Scanning
        uses: trufflesecurity/trufflehog@main
-        with:
-          extra_args: --results=verified,unknown
--- a/.github/workflows/update_metdata.yml
+++ b/.github/workflows/update_metdata.yml
@ -19,7 +19,7 @@ jobs:
      - name: Setup environment
        run: |
          pip install --upgrade pip
-          pip install datasets pandas
+          pip install datasets pandas==2.0.3
          pip install .[torch,tf,flax]

      - name: Update metadata
--- a/.gitignore
+++ b/.gitignore
@ -167,6 +167,3 @@ tags

 # ruff
 .ruff_cache
-
-# modular conversion
-*.modular_backup
--- a/AGENTS.md
+++ b/AGENTS.md
@ -1,39 +0,0 @@
-# AGENTS.md Guide for Hugging Face Transformers
-
-This AGENTS.md file provides guidance for code agents working with this codebase.
-
-## Core Project Structure
-
- `/src/transformers`: This contains the core source code for the library
-  - `/models`: Code for individual models. Models inherit from base classes in the root `/src/transformers` directory.
- `/tests`: This contains the core test classes for the library. These are usually inherited rather than directly run.
-  - `/models`: Tests for individual models. Model tests inherit from common tests in the root `/tests` directory.
- `/docs`: This contains the documentation for the library, including guides, tutorials, and API references.
-
-## Coding Conventions for Hugging Face Transformers
-
- PRs should be as brief as possible. Bugfix PRs in particular can often be only one or two lines long, and do not need large comments, docstrings or new functions in this case. Aim to minimize the size of the diff.
- When writing tests, they should be added to an existing file. The only exception is for PRs to add a new model, when a new test directory should be created for that model.
- Code style is enforced in the CI. You can install the style tools with `pip install -e .[quality]`. You can then run `make fixup` to apply style and consistency fixes to your code.
-
-## Copying and inheritance
-
-Many models in the codebase have similar code, but it is not shared by inheritance because we want each model file to be self-contained.
-We use two mechanisms to keep this code in sync:
-
- "Copied from" syntax. Functions or entire classes can have a comment at the top like this: `# Copied from transformers.models.llama.modeling_llama.rotate_half` or `# Copied from transformers.models.t5.modeling_t5.T5LayerNorm with T5->MT5`
-  These comments are actively checked by the style tools, and copies will automatically be updated when the base code is updated. If you need to update a copied function, you should
-  either update the base function and use `make fixup` to propagate the change to all copies, or simply remove the `# Copied from` comment if that is inappropriate.
- "Modular" files. These files briefly define models by composing them using inheritance from other models. They are not meant to be used directly. Instead, the style tools
-  automatically generate a complete modeling file, like `modeling_bert.py`, from the modular file like `modular_bert.py`. If a model has a modular file, the modeling file
-  should never be edited directly! Instead, changes should be made in the modular file, and then you should run `make fixup` to update the modeling file automatically.
-
-When adding new models, you should prefer `modular` style.
-
-## Testing
-
-After making changes, you should usually run `make fixup` to ensure any copies and modular files are updated, and then test all affected models. This includes both
-the model you made the changes in and any other models that were updated by `make fixup`. Tests can be run with `pytest tests/models/[name]/test_modeling_[name].py`
-If your changes affect code in other classes like tokenizers or processors, you should run those tests instead, like `test_processing_[name].py` or `test_tokenization_[name].py`.
-
-In order to run tests, you may need to install dependencies. You can do this with `pip install -e .[testing]`. You will probably also need to `pip install torch accelerate` if your environment does not already have them.
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -78,7 +78,7 @@ Once you've confirmed the bug hasn't already been reported, please include the f
 To get the OS and software versions automatically, run the following command:

 ```bash
-transformers env
+transformers-cli env
 ```

 You can also run the same command from the root of the repository:
@ -221,10 +221,10 @@ You'll need **[Python 3.9](https://github.com/huggingface/transformers/blob/main
   [Checks on a Pull Request](https://huggingface.co/docs/transformers/pr_checks) guide.

   If you're modifying documents under the `docs/source` directory, make sure the documentation can still be built. This check will also run in the CI when you open a pull request. To run a local check
-   make sure you install the [documentation builder](https://github.com/huggingface/doc-builder).
+   make sure you install the documentation builder:

   ```bash
-   pip install hf-doc-builder
+   pip install ".[docs]"
   ```

   Run the following command from the root of the repository:
@ -343,6 +343,8 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/t

 Like the slow tests, there are other environment variables available which are not enabled by default during testing:
 - `RUN_CUSTOM_TOKENIZERS`: Enables tests for custom tokenizers.
+- `RUN_PT_FLAX_CROSS_TESTS`: Enables tests for PyTorch + Flax integration.
+- `RUN_PT_TF_CROSS_TESTS`: Enables tests for TensorFlow + PyTorch integration.

 More environment variables and additional information can be found in the [testing_utils.py](https://github.com/huggingface/transformers/blob/main/src/transformers/testing_utils.py).

--- a/ISSUES.md
+++ b/ISSUES.md
@ -26,7 +26,7 @@ There are two main venues to receive support: [the forums](https://discuss.huggi

 [The user forums](https://discuss.huggingface.co/) are supported by the wide community of the library users and backed up by developers when needed.

-If you have a difficulty with deploying this library or some questions, or you'd like to discuss a new feature, please first consider discussing those things at the forums. Only when you feel your subject matter has been crystallized and you still need support from the library developers do proceed to file an [issue](https://github.com/huggingface/transformers/issues).
+If you have a difficulty with deploying this library or some questions, or you'd like to discuss a new feature, please first consider discussing those things at the forums. Only when you feel your subject matter has been crystalized and you still need support from the library developers do proceed to file an [issue](https://github.com/huggingface/transformers/issues).

 In particular all "Please explain" questions or objectively very user-specific feature requests belong to the forums. Here are some example of such questions:

@ -263,9 +263,9 @@ You are not required to read the following guidelines before opening an issue. H
    But if you're replying to a comment that happened some comments back it's always a good practice to quote just the relevant lines you're replying it. The `>` is used for quoting, or you can always use the menu to do so. For example your editor box will look like:

    ```
-    > How big is your GPU cluster?
+    > How big is your gpu cluster?

-    Our cluster is made of 256 GPUs.
+    Our cluster is made of 256 gpus.
    ```

    If you are addressing multiple comments, quote the relevant parts of each before your answer. Some people use the same comment to do multiple replies, others separate them into separate comments. Either way works. The latter approach helps for linking to a specific comment.
--- a/26
+++ b/26
@ -8,19 +8,13 @@ check_dirs := examples tests src utils
 exclude_folders :=  ""

 modified_only_fixup:
-	@current_branch=$$(git branch --show-current); \
-	if [ "$$current_branch" = "main" ]; then \
-		echo "On main branch, running 'style' target instead..."; \
-		$(MAKE) style; \
+	$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
+	@if test -n "$(modified_py_files)"; then \
+		echo "Checking/fixing $(modified_py_files)"; \
+		ruff check $(modified_py_files) --fix --exclude $(exclude_folders); \
+		ruff format $(modified_py_files) --exclude $(exclude_folders);\
 	else \
-		modified_py_files=$$(python utils/get_modified_files.py $(check_dirs)); \
-		if [ -n "$$modified_py_files" ]; then \
-			echo "Checking/fixing files: $${modified_py_files}"; \
-			ruff check $${modified_py_files} --fix --exclude $(exclude_folders); \
-			ruff format $${modified_py_files} --exclude $(exclude_folders); \
-		else \
-			echo "No library .py files were modified"; \
-		fi; \
+		echo "No library .py files were modified"; \
 	fi

 # Update src/transformers/dependency_versions_table.py
@ -43,16 +37,16 @@ autogenerate_code: deps_table_update
 repo-consistency:
 	python utils/check_copies.py
 	python utils/check_modular_conversion.py
+	python utils/check_table.py
 	python utils/check_dummies.py
 	python utils/check_repo.py
 	python utils/check_inits.py
-	python utils/check_pipeline_typing.py
 	python utils/check_config_docstrings.py
 	python utils/check_config_attributes.py
 	python utils/check_doctest_list.py
 	python utils/update_metadata.py --check-only
 	python utils/check_docstrings.py
-	python utils/add_dates.py
+	python utils/check_support_list.py

 # this target runs checks on all files

@ -87,9 +81,9 @@ fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency

 fix-copies:
 	python utils/check_copies.py --fix_and_overwrite
-	python utils/check_modular_conversion.py --fix_and_overwrite
+	python utils/check_modular_conversion.py  --fix_and_overwrite
+	python utils/check_table.py --fix_and_overwrite
 	python utils/check_dummies.py --fix_and_overwrite
-	python utils/check_pipeline_typing.py --fix_and_overwrite
 	python utils/check_doctest_list.py --fix_and_overwrite
 	python utils/check_docstrings.py --fix_and_overwrite

--- a/README.md
+++ b/README.md
@ -25,7 +25,6 @@ limitations under the License.
 </p>

 <p align="center">
-    <a href="https://huggingface.com/models"><img alt="Checkpoints on Hub" src="https://img.shields.io/endpoint?url=https://huggingface.co/api/shields/models&color=brightgreen"></a>
    <a href="https://circleci.com/gh/huggingface/transformers"><img alt="Build" src="https://img.shields.io/circleci/build/github/huggingface/transformers/main"></a>
    <a href="https://github.com/huggingface/transformers/blob/main/LICENSE"><img alt="GitHub" src="https://img.shields.io/github/license/huggingface/transformers.svg?color=blue"></a>
    <a href="https://huggingface.co/docs/transformers/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers/index.svg?down_color=red&down_message=offline&up_message=online"></a>
@ -44,7 +43,7 @@ limitations under the License.
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ja.md">日本語</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_hd.md">हिन्दी</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ru.md">Русский</a> |
-        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Português</a> |
+        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_pt-br.md">Рortuguês</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_te.md">తెలుగు</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
        <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
@ -55,268 +54,275 @@ limitations under the License.
 </h4>

 <h3 align="center">
-    <p>State-of-the-art pretrained models for inference and training</p>
+    <p>State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow</p>
 </h3>

 <h3 align="center">
-    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/transformers_as_a_model_definition.png"/>
+    <a href="https://hf.co/course"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/course_banner.png"></a>
 </h3>

+🤗 Transformers provides thousands of pretrained models to perform tasks on different modalities such as text, vision, and audio.

-Transformers acts as the model-definition framework for state-of-the-art machine learning models in text, computer 
-vision, audio, video, and multimodal model, for both inference and training. 
+These models can be applied on:

-It centralizes the model definition so that this definition is agreed upon across the ecosystem. `transformers` is the 
-pivot across frameworks: if a model definition is supported, it will be compatible with the majority of training 
-frameworks (Axolotl, Unsloth, DeepSpeed, FSDP, PyTorch-Lightning, ...), inference engines (vLLM, SGLang, TGI, ...),
-and adjacent modeling libraries (llama.cpp, mlx, ...) which leverage the model definition from `transformers`.
+* 📝 Text, for tasks like text classification, information extraction, question answering, summarization, translation, and text generation, in over 100 languages.
+* 🖼️ Images, for tasks like image classification, object detection, and segmentation.
+* 🗣️ Audio, for tasks like speech recognition and audio classification.

-We pledge to help support new state-of-the-art models and democratize their usage by having their model definition be
-simple, customizable, and efficient.
+Transformer models can also perform tasks on **several modalities combined**, such as table question answering, optical character recognition, information extraction from scanned documents, video classification, and visual question answering.

-There are over 1M+ Transformers [model checkpoints](https://huggingface.co/models?library=transformers&sort=trending) on the [Hugging Face Hub](https://huggingface.com/models) you can use.
+🤗 Transformers provides APIs to quickly download and use those pretrained models on a given text, fine-tune them on your own datasets and then share them with the community on our [model hub](https://huggingface.co/models). At the same time, each python module defining an architecture is fully standalone and can be modified to enable quick research experiments.

-Explore the [Hub](https://huggingface.com/) today to find a model and use Transformers to help you get started right away.
+🤗 Transformers is backed by the three most popular deep learning libraries — [Jax](https://jax.readthedocs.io/en/latest/), [PyTorch](https://pytorch.org/) and [TensorFlow](https://www.tensorflow.org/) — with a seamless integration between them. It's straightforward to train your models with one before loading them for inference with the other.

-## Installation
+## Online demos

-Transformers works with Python 3.9+ [PyTorch](https://pytorch.org/get-started/locally/) 2.1+, [TensorFlow](https://www.tensorflow.org/install/pip) 2.6+, and [Flax](https://flax.readthedocs.io/en/latest/) 0.4.1+.
+You can test most of our models directly on their pages from the [model hub](https://huggingface.co/models). We also offer [private model hosting, versioning, & an inference API](https://huggingface.co/pricing) for public and private models.

-Create and activate a virtual environment with [venv](https://docs.python.org/3/library/venv.html) or [uv](https://docs.astral.sh/uv/), a fast Rust-based Python package and project manager.
+Here are a few examples:

-```py
-# venv
-python -m venv .my-env
-source .my-env/bin/activate
-# uv
-uv venv .my-env
-source .my-env/bin/activate
+In Natural Language Processing:
+- [Masked word completion with BERT](https://huggingface.co/google-bert/bert-base-uncased?text=Paris+is+the+%5BMASK%5D+of+France)
+- [Named Entity Recognition with Electra](https://huggingface.co/dbmdz/electra-large-discriminator-finetuned-conll03-english?text=My+name+is+Sarah+and+I+live+in+London+city)
+- [Text generation with Mistral](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)
+- [Natural Language Inference with RoBERTa](https://huggingface.co/FacebookAI/roberta-large-mnli?text=The+dog+was+lost.+Nobody+lost+any+animal)
+- [Summarization with BART](https://huggingface.co/facebook/bart-large-cnn?text=The+tower+is+324+metres+%281%2C063+ft%29+tall%2C+about+the+same+height+as+an+81-storey+building%2C+and+the+tallest+structure+in+Paris.+Its+base+is+square%2C+measuring+125+metres+%28410+ft%29+on+each+side.+During+its+construction%2C+the+Eiffel+Tower+surpassed+the+Washington+Monument+to+become+the+tallest+man-made+structure+in+the+world%2C+a+title+it+held+for+41+years+until+the+Chrysler+Building+in+New+York+City+was+finished+in+1930.+It+was+the+first+structure+to+reach+a+height+of+300+metres.+Due+to+the+addition+of+a+broadcasting+aerial+at+the+top+of+the+tower+in+1957%2C+it+is+now+taller+than+the+Chrysler+Building+by+5.2+metres+%2817+ft%29.+Excluding+transmitters%2C+the+Eiffel+Tower+is+the+second+tallest+free-standing+structure+in+France+after+the+Millau+Viaduct)
+- [Question answering with DistilBERT](https://huggingface.co/distilbert/distilbert-base-uncased-distilled-squad?text=Which+name+is+also+used+to+describe+the+Amazon+rainforest+in+English%3F&context=The+Amazon+rainforest+%28Portuguese%3A+Floresta+Amaz%C3%B4nica+or+Amaz%C3%B4nia%3B+Spanish%3A+Selva+Amaz%C3%B3nica%2C+Amazon%C3%ADa+or+usually+Amazonia%3B+French%3A+For%C3%AAt+amazonienne%3B+Dutch%3A+Amazoneregenwoud%29%2C+also+known+in+English+as+Amazonia+or+the+Amazon+Jungle%2C+is+a+moist+broadleaf+forest+that+covers+most+of+the+Amazon+basin+of+South+America.+This+basin+encompasses+7%2C000%2C000+square+kilometres+%282%2C700%2C000+sq+mi%29%2C+of+which+5%2C500%2C000+square+kilometres+%282%2C100%2C000+sq+mi%29+are+covered+by+the+rainforest.+This+region+includes+territory+belonging+to+nine+nations.+The+majority+of+the+forest+is+contained+within+Brazil%2C+with+60%25+of+the+rainforest%2C+followed+by+Peru+with+13%25%2C+Colombia+with+10%25%2C+and+with+minor+amounts+in+Venezuela%2C+Ecuador%2C+Bolivia%2C+Guyana%2C+Suriname+and+French+Guiana.+States+or+departments+in+four+nations+contain+%22Amazonas%22+in+their+names.+The+Amazon+represents+over+half+of+the+planet%27s+remaining+rainforests%2C+and+comprises+the+largest+and+most+biodiverse+tract+of+tropical+rainforest+in+the+world%2C+with+an+estimated+390+billion+individual+trees+divided+into+16%2C000+species)
+- [Translation with T5](https://huggingface.co/google-t5/t5-base?text=My+name+is+Wolfgang+and+I+live+in+Berlin)
+
+In Computer Vision:
+- [Image classification with ViT](https://huggingface.co/google/vit-base-patch16-224)
+- [Object Detection with DETR](https://huggingface.co/facebook/detr-resnet-50)
+- [Semantic Segmentation with SegFormer](https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512)
+- [Panoptic Segmentation with Mask2Former](https://huggingface.co/facebook/mask2former-swin-large-coco-panoptic)
+- [Depth Estimation with Depth Anything](https://huggingface.co/docs/transformers/main/model_doc/depth_anything)
+- [Video Classification with VideoMAE](https://huggingface.co/docs/transformers/model_doc/videomae)
+- [Universal Segmentation with OneFormer](https://huggingface.co/shi-labs/oneformer_ade20k_dinat_large)
+
+In Audio:
+- [Automatic Speech Recognition with Whisper](https://huggingface.co/openai/whisper-large-v3)
+- [Keyword Spotting with Wav2Vec2](https://huggingface.co/superb/wav2vec2-base-superb-ks)
+- [Audio Classification with Audio Spectrogram Transformer](https://huggingface.co/MIT/ast-finetuned-audioset-10-10-0.4593)
+
+In Multimodal tasks:
+- [Table Question Answering with TAPAS](https://huggingface.co/google/tapas-base-finetuned-wtq)
+- [Visual Question Answering with ViLT](https://huggingface.co/dandelin/vilt-b32-finetuned-vqa)
+- [Image captioning with LLaVa](https://huggingface.co/llava-hf/llava-1.5-7b-hf)
+- [Zero-shot Image Classification with SigLIP](https://huggingface.co/google/siglip-so400m-patch14-384)
+- [Document Question Answering with LayoutLM](https://huggingface.co/impira/layoutlm-document-qa)
+- [Zero-shot Video Classification with X-CLIP](https://huggingface.co/docs/transformers/model_doc/xclip)
+- [Zero-shot Object Detection with OWLv2](https://huggingface.co/docs/transformers/en/model_doc/owlv2)
+- [Zero-shot Image Segmentation with CLIPSeg](https://huggingface.co/docs/transformers/model_doc/clipseg)
+- [Automatic Mask Generation with SAM](https://huggingface.co/docs/transformers/model_doc/sam)
+
+
+## 100 projects using Transformers
+
+Transformers is more than a toolkit to use pretrained models: it's a community of projects built around it and the
+Hugging Face Hub. We want Transformers to enable developers, researchers, students, professors, engineers, and anyone
+else to build their dream projects.
+
+In order to celebrate the 100,000 stars of transformers, we have decided to put the spotlight on the
+community, and we have created the [awesome-transformers](./awesome-transformers.md) page which lists 100
+incredible projects built in the vicinity of transformers.
+
+If you own or use a project that you believe should be part of the list, please open a PR to add it!
+
+## Serious about AI in your organisation? Build faster with the Hugging Face Enterprise Hub.
+
+<a target="_blank" href="https://huggingface.co/enterprise">
+    <img alt="Hugging Face Enterprise Hub" src="https://github.com/user-attachments/assets/247fb16d-d251-4583-96c4-d3d76dda4925">
+</a><br>
+
+## Quick tour
+
+To immediately use a model on a given input (text, image, audio, ...), we provide the `pipeline` API. Pipelines group together a pretrained model with the preprocessing that was used during that model's training. Here is how to quickly use a pipeline to classify positive versus negative texts:
+
+```python
+>>> from transformers import pipeline
+
+# Allocate a pipeline for sentiment-analysis
+>>> classifier = pipeline('sentiment-analysis')
+>>> classifier('We are very happy to introduce pipeline to the transformers repository.')
+[{'label': 'POSITIVE', 'score': 0.9996980428695679}]
 ```

-Install Transformers in your virtual environment.
+The second line of code downloads and caches the pretrained model used by the pipeline, while the third evaluates it on the given text. Here, the answer is "positive" with a confidence of 99.97%.

-```py
-# pip
-pip install "transformers[torch]"
+Many tasks have a pre-trained `pipeline` ready to go, in NLP but also in computer vision and speech. For example, we can easily extract detected objects in an image:

-# uv
-uv pip install "transformers[torch]"
+``` python
+>>> import requests
+>>> from PIL import Image
+>>> from transformers import pipeline
+
+# Download an image with cute cats
+>>> url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample.png"
+>>> image_data = requests.get(url, stream=True).raw
+>>> image = Image.open(image_data)
+
+# Allocate a pipeline for object detection
+>>> object_detector = pipeline('object-detection')
+>>> object_detector(image)
+[{'score': 0.9982201457023621,
+  'label': 'remote',
+  'box': {'xmin': 40, 'ymin': 70, 'xmax': 175, 'ymax': 117}},
+ {'score': 0.9960021376609802,
+  'label': 'remote',
+  'box': {'xmin': 333, 'ymin': 72, 'xmax': 368, 'ymax': 187}},
+ {'score': 0.9954745173454285,
+  'label': 'couch',
+  'box': {'xmin': 0, 'ymin': 1, 'xmax': 639, 'ymax': 473}},
+ {'score': 0.9988006353378296,
+  'label': 'cat',
+  'box': {'xmin': 13, 'ymin': 52, 'xmax': 314, 'ymax': 470}},
+ {'score': 0.9986783862113953,
+  'label': 'cat',
+  'box': {'xmin': 345, 'ymin': 23, 'xmax': 640, 'ymax': 368}}]
 ```

-Install Transformers from source if you want the latest changes in the library or are interested in contributing. However, the *latest* version may not be stable. Feel free to open an [issue](https://github.com/huggingface/transformers/issues) if you encounter an error.
-
-```shell
-git clone https://github.com/huggingface/transformers.git
-cd transformers
-
-# pip
-pip install .[torch]
-
-# uv
-uv pip install .[torch]
-```
-
-## Quickstart
-
-Get started with Transformers right away with the [Pipeline](https://huggingface.co/docs/transformers/pipeline_tutorial) API. The `Pipeline` is a high-level inference class that supports text, audio, vision, and multimodal tasks. It handles preprocessing the input and returns the appropriate output.
-
-Instantiate a pipeline and specify model to use for text generation. The model is downloaded and cached so you can easily reuse it again. Finally, pass some text to prompt the model.
-
-```py
-from transformers import pipeline
-
-pipeline = pipeline(task="text-generation", model="Qwen/Qwen2.5-1.5B")
-pipeline("the secret to baking a really good cake is ")
-[{'generated_text': 'the secret to baking a really good cake is 1) to use the right ingredients and 2) to follow the recipe exactly. the recipe for the cake is as follows: 1 cup of sugar, 1 cup of flour, 1 cup of milk, 1 cup of butter, 1 cup of eggs, 1 cup of chocolate chips. if you want to make 2 cakes, how much sugar do you need? To make 2 cakes, you will need 2 cups of sugar.'}]
-```
-
-To chat with a model, the usage pattern is the same. The only difference is you need to construct a chat history (the input to `Pipeline`) between you and the system.
-
-> [!TIP]
-> You can also chat with a model directly from the command line.
-> ```shell
-> transformers chat Qwen/Qwen2.5-0.5B-Instruct
-> ```
-
-```py
-import torch
-from transformers import pipeline
-
-chat = [
-    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
-    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
-]
-
-pipeline = pipeline(task="text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto")
-response = pipeline(chat, max_new_tokens=512)
-print(response[0]["generated_text"][-1]["content"])
-```
-
-Expand the examples below to see how `Pipeline` works for different modalities and tasks.
-
-<details>
-<summary>Automatic speech recognition</summary>
-
-```py
-from transformers import pipeline
-
-pipeline = pipeline(task="automatic-speech-recognition", model="openai/whisper-large-v3")
-pipeline("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
-{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
-```
-
-</details>
-
-<details>
-<summary>Image classification</summary>
+Here, we get a list of objects detected in the image, with a box surrounding the object and a confidence score. Here is the original image on the left, with the predictions displayed on the right:

 <h3 align="center">
-    <a><img src="https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png"></a>
+    <a><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample.png" width="400"></a>
+    <a><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample_post_processed.png" width="400"></a>
 </h3>

-```py
-from transformers import pipeline
+You can learn more about the tasks supported by the `pipeline` API in [this tutorial](https://huggingface.co/docs/transformers/task_summary).

-pipeline = pipeline(task="image-classification", model="facebook/dinov2-small-imagenet1k-1-layer")
-pipeline("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png")
-[{'label': 'macaw', 'score': 0.997848391532898},
- {'label': 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita',
-  'score': 0.0016551691805943847},
- {'label': 'lorikeet', 'score': 0.00018523589824326336},
- {'label': 'African grey, African gray, Psittacus erithacus',
-  'score': 7.85409429227002e-05},
- {'label': 'quail', 'score': 5.502637941390276e-05}]
+In addition to `pipeline`, to download and use any of the pretrained models on your given task, all it takes is three lines of code. Here is the PyTorch version:
+```python
+>>> from transformers import AutoTokenizer, AutoModel
+
+>>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+>>> model = AutoModel.from_pretrained("google-bert/bert-base-uncased")
+
+>>> inputs = tokenizer("Hello world!", return_tensors="pt")
+>>> outputs = model(**inputs)
 ```

-</details>
+And here is the equivalent code for TensorFlow:
+```python
+>>> from transformers import AutoTokenizer, TFAutoModel

-<details>
-<summary>Visual question answering</summary>
+>>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+>>> model = TFAutoModel.from_pretrained("google-bert/bert-base-uncased")

-
-<h3 align="center">
-    <a><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/idefics-few-shot.jpg"></a>
-</h3>
-
-```py
-from transformers import pipeline
-
-pipeline = pipeline(task="visual-question-answering", model="Salesforce/blip-vqa-base")
-pipeline(
-    image="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/idefics-few-shot.jpg",
-    question="What is in the image?",
-)
-[{'answer': 'statue of liberty'}]
+>>> inputs = tokenizer("Hello world!", return_tensors="tf")
+>>> outputs = model(**inputs)
 ```

-</details>
+The tokenizer is responsible for all the preprocessing the pretrained model expects and can be called directly on a single string (as in the above examples) or a list. It will output a dictionary that you can use in downstream code or simply directly pass to your model using the ** argument unpacking operator.

-## Why should I use Transformers?
+The model itself is a regular [Pytorch `nn.Module`](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) or a [TensorFlow `tf.keras.Model`](https://www.tensorflow.org/api_docs/python/tf/keras/Model) (depending on your backend) which you can use as usual. [This tutorial](https://huggingface.co/docs/transformers/training) explains how to integrate such a model into a classic PyTorch or TensorFlow training loop, or how to use our `Trainer` API to quickly fine-tune on a new dataset.
+
+## Why should I use transformers?

 1. Easy-to-use state-of-the-art models:
-    - High performance on natural language understanding & generation, computer vision, audio, video, and multimodal tasks.
-    - Low barrier to entry for researchers, engineers, and developers.
+    - High performance on natural language understanding & generation, computer vision, and audio tasks.
+    - Low barrier to entry for educators and practitioners.
    - Few user-facing abstractions with just three classes to learn.
    - A unified API for using all our pretrained models.

 1. Lower compute costs, smaller carbon footprint:
-    - Share trained models instead of training from scratch.
-    - Reduce compute time and production costs.
-    - Dozens of model architectures with 1M+ pretrained checkpoints across all modalities.
+    - Researchers can share trained models instead of always retraining.
+    - Practitioners can reduce compute time and production costs.
+    - Dozens of architectures with over 400,000 pretrained models across all modalities.

-1. Choose the right framework for every part of a models lifetime:
+1. Choose the right framework for every part of a model's lifetime:
    - Train state-of-the-art models in 3 lines of code.
-    - Move a single model between PyTorch/JAX/TF2.0 frameworks at will.
-    - Pick the right framework for training, evaluation, and production.
+    - Move a single model between TF2.0/PyTorch/JAX frameworks at will.
+    - Seamlessly pick the right framework for training, evaluation, and production.

 1. Easily customize a model or an example to your needs:
    - We provide examples for each architecture to reproduce the results published by its original authors.
    - Model internals are exposed as consistently as possible.
    - Model files can be used independently of the library for quick experiments.

-<a target="_blank" href="https://huggingface.co/enterprise">
-    <img alt="Hugging Face Enterprise Hub" src="https://github.com/user-attachments/assets/247fb16d-d251-4583-96c4-d3d76dda4925">
-</a><br>
-
-## Why shouldn't I use Transformers?
+## Why shouldn't I use transformers?

 - This library is not a modular toolbox of building blocks for neural nets. The code in the model files is not refactored with additional abstractions on purpose, so that researchers can quickly iterate on each of the models without diving into additional abstractions/files.
- The training API is optimized to work with PyTorch models provided by Transformers. For generic machine learning loops, you should use another library like [Accelerate](https://huggingface.co/docs/accelerate).
- The [example scripts](https://github.com/huggingface/transformers/tree/main/examples) are only *examples*. They may not necessarily work out-of-the-box on your specific use case and you'll need to adapt the code for it to work.
+- The training API is not intended to work on any model but is optimized to work with the models provided by the library. For generic machine learning loops, you should use another library (possibly, [Accelerate](https://huggingface.co/docs/accelerate)).
+- While we strive to present as many use cases as possible, the scripts in our [examples folder](https://github.com/huggingface/transformers/tree/main/examples) are just that: examples. It is expected that they won't work out-of-the-box on your specific problem and that you will be required to change a few lines of code to adapt them to your needs.

-## 100 projects using Transformers
+## Installation

-Transformers is more than a toolkit to use pretrained models, it's a community of projects built around it and the
-Hugging Face Hub. We want Transformers to enable developers, researchers, students, professors, engineers, and anyone
-else to build their dream projects.
+### With pip

-In order to celebrate Transformers 100,000 stars, we wanted to put the spotlight on the
-community with the [awesome-transformers](./awesome-transformers.md) page which lists 100
-incredible projects built with Transformers.
+This repository is tested on Python 3.9+, Flax 0.4.1+, PyTorch 2.0+, and TensorFlow 2.6+.

-If you own or use a project that you believe should be part of the list, please open a PR to add it!
+You should install 🤗 Transformers in a [virtual environment](https://docs.python.org/3/library/venv.html). If you're unfamiliar with Python virtual environments, check out the [user guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).

-## Example models
+First, create a virtual environment with the version of Python you're going to use and activate it.

-You can test most of our models directly on their [Hub model pages](https://huggingface.co/models).
+**macOS/Linux**

-Expand each modality below to see a few example models for various use cases.
+```python -m venv env
+source env/bin/activate
+```

-<details>
-<summary>Audio</summary>
+**Windows**

- Audio classification with [Whisper](https://huggingface.co/openai/whisper-large-v3-turbo)
- Automatic speech recognition with [Moonshine](https://huggingface.co/UsefulSensors/moonshine)
- Keyword spotting with [Wav2Vec2](https://huggingface.co/superb/wav2vec2-base-superb-ks)
- Speech to speech generation with [Moshi](https://huggingface.co/kyutai/moshiko-pytorch-bf16)
- Text to audio with [MusicGen](https://huggingface.co/facebook/musicgen-large)
- Text to speech with [Bark](https://huggingface.co/suno/bark)
+``` python -m venv env
+env\Scripts\activate
+```

-</details>
+To use 🤗 Transformers, you must install at least one of Flax, PyTorch, or TensorFlow. Refer to the official installation guides for platform-specific commands:

-<details>
-<summary>Computer vision</summary>
+[TensorFlow installation page](https://www.tensorflow.org/install/), 
+[PyTorch installation page](https://pytorch.org/get-started/locally/#start-locally) and/or [Flax](https://github.com/google/flax#quick-install) and [Jax](https://github.com/google/jax#installation) 

- Automatic mask generation with [SAM](https://huggingface.co/facebook/sam-vit-base)
- Depth estimation with [DepthPro](https://huggingface.co/apple/DepthPro-hf)
- Image classification with [DINO v2](https://huggingface.co/facebook/dinov2-base)
- Keypoint detection with [SuperPoint](https://huggingface.co/magic-leap-community/superpoint)
- Keypoint matching with [SuperGlue](https://huggingface.co/magic-leap-community/superglue_outdoor)
- Object detection with [RT-DETRv2](https://huggingface.co/PekingU/rtdetr_v2_r50vd)
- Pose Estimation with [VitPose](https://huggingface.co/usyd-community/vitpose-base-simple)
- Universal segmentation with [OneFormer](https://huggingface.co/shi-labs/oneformer_ade20k_swin_large)
- Video classification with [VideoMAE](https://huggingface.co/MCG-NJU/videomae-large)
+When one of those backends has been installed, 🤗 Transformers can be installed using pip as follows:

-</details>
+```
+pip install transformers
+```

-<details>
-<summary>Multimodal</summary>
+If you'd like to play with the examples or need the bleeding edge of the code and can't wait for a new release, you must [install the library from source](https://huggingface.co/docs/transformers/installation#installing-from-source).

- Audio or text to text with [Qwen2-Audio](https://huggingface.co/Qwen/Qwen2-Audio-7B)
- Document question answering with [LayoutLMv3](https://huggingface.co/microsoft/layoutlmv3-base)
- Image or text to text with [Qwen-VL](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct)
- Image captioning [BLIP-2](https://huggingface.co/Salesforce/blip2-opt-2.7b)
- OCR-based document understanding with [GOT-OCR2](https://huggingface.co/stepfun-ai/GOT-OCR-2.0-hf)
- Table question answering with [TAPAS](https://huggingface.co/google/tapas-base)
- Unified multimodal understanding and generation with [Emu3](https://huggingface.co/BAAI/Emu3-Gen)
- Vision to text with [Llava-OneVision](https://huggingface.co/llava-hf/llava-onevision-qwen2-0.5b-ov-hf)
- Visual question answering with [Llava](https://huggingface.co/llava-hf/llava-1.5-7b-hf)
- Visual referring expression segmentation with [Kosmos-2](https://huggingface.co/microsoft/kosmos-2-patch14-224)
+```
+git clone https://github.com/huggingface/transformers.git
+cd transformers
+pip install
+```

-</details>
+### With conda

-<details>
-<summary>NLP</summary>
+🤗 Transformers can be installed using conda as follows:

- Masked word completion with [ModernBERT](https://huggingface.co/answerdotai/ModernBERT-base)
- Named entity recognition with [Gemma](https://huggingface.co/google/gemma-2-2b)
- Question answering with [Mixtral](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)
- Summarization with [BART](https://huggingface.co/facebook/bart-large-cnn)
- Translation with [T5](https://huggingface.co/google-t5/t5-base)
- Text generation with [Llama](https://huggingface.co/meta-llama/Llama-3.2-1B)
- Text classification with [Qwen](https://huggingface.co/Qwen/Qwen2.5-0.5B)
+```shell script
+conda install conda-forge::transformers
+```

-</details>
+> **_NOTE:_** Installing `transformers` from the `huggingface` channel is deprecated.
+
+Follow the installation pages of Flax, PyTorch or TensorFlow to see how to install them with conda.
+
+> **_NOTE:_**  On Windows, you may be prompted to activate Developer Mode in order to benefit from caching. If this is not an option for you, please let us know in [this issue](https://github.com/huggingface/huggingface_hub/issues/1062).
+
+## Model architectures
+
+**[All the model checkpoints](https://huggingface.co/models)** provided by 🤗 Transformers are seamlessly integrated from the huggingface.co [model hub](https://huggingface.co/models), where they are uploaded directly by [users](https://huggingface.co/users) and [organizations](https://huggingface.co/organizations).
+
+Current number of checkpoints: ![](https://img.shields.io/endpoint?url=https://huggingface.co/api/shields/models&color=brightgreen)
+
+🤗 Transformers currently provides the following architectures: see [here](https://huggingface.co/docs/transformers/model_summary) for a high-level summary of each them.
+
+To check if each model has an implementation in Flax, PyTorch or TensorFlow, or has an associated tokenizer backed by the 🤗 Tokenizers library, refer to [this table](https://huggingface.co/docs/transformers/index#supported-frameworks).
+
+These implementations have been tested on several datasets (see the example scripts) and should match the performance of the original implementations. You can find more details on performance in the Examples section of the [documentation](https://github.com/huggingface/transformers/tree/main/examples).
+
+
+## Learn more
+
+| Section | Description |
+|-|-|
+| [Documentation](https://huggingface.co/docs/transformers/) | Full API documentation and tutorials |
+| [Task summary](https://huggingface.co/docs/transformers/task_summary) | Tasks supported by 🤗 Transformers |
+| [Preprocessing tutorial](https://huggingface.co/docs/transformers/preprocessing) | Using the `Tokenizer` class to prepare data for the models |
+| [Training and fine-tuning](https://huggingface.co/docs/transformers/training) | Using the models provided by 🤗 Transformers in a PyTorch/TensorFlow training loop and the `Trainer` API |
+| [Quick tour: Fine-tuning/usage scripts](https://github.com/huggingface/transformers/tree/main/examples) | Example scripts for fine-tuning models on a wide range of tasks |
+| [Model sharing and uploading](https://huggingface.co/docs/transformers/model_sharing) | Upload and share your fine-tuned models with the community |

 ## Citation

--- a/SECURITY.md
+++ b/SECURITY.md
@ -14,7 +14,7 @@ Models uploaded on the Hugging Face Hub come in different formats. We heavily re
 models in the [`safetensors`](https://github.com/huggingface/safetensors) format (which is the default prioritized
 by the transformers library), as developed specifically to prevent arbitrary code execution on your system.

-To avoid loading models from unsafe formats (e.g. [pickle](https://docs.python.org/3/library/pickle.html), you should use the `use_safetensors` parameter. If doing so, in the event that no .safetensors file is present, transformers will error when loading the model.
+To avoid loading models from unsafe formats(e.g. [pickle](https://docs.python.org/3/library/pickle.html), you should use the `use_safetensors` parameter. If doing so, in the event that no .safetensors file is present, transformers will error when loading the model.

 ### Remote code

@ -27,6 +27,13 @@ These models require the `trust_remote_code=True` parameter to be set when using
 the content of the modeling files when using this argument. We recommend setting a revision in order to ensure you
 protect yourself from updates on the repository.

+#### Tools
+
+Through the `Agent` framework, remote tools can be downloaded to be used by the Agent. You're to specify these tools
+yourself, but please keep in mind that their code will be run on your machine if the Agent chooses to run them.
+
+Please inspect the code of the tools before passing them to the Agent to protect your runtime and local setup.
+
 ## Reporting a Vulnerability

 Feel free to submit vulnerability reports to [security@huggingface.co](mailto:security@huggingface.co), where someone from the HF security team will review and recommend next steps. If reporting a vulnerability specific to open source, please note [Huntr](https://huntr.com) is a vulnerability disclosure program for open source software.
--- a/awesome-transformers.md
+++ b/awesome-transformers.md
@ -15,7 +15,7 @@ to add it.

 Keywords: Open-source, LLaMa, GPT-J, instruction, assistant

-## [recommenders](https://github.com/recommenders-team/recommenders)
+## [recommenders](https://github.com/microsoft/recommenders)

 This repository contains examples and best practices for building recommendation systems, provided as Jupyter notebooks. It goes over several aspects required to build efficient recommendation systems: data preparation, modeling, evaluation, model selection & optimization, as well as operationalization

@ -29,7 +29,7 @@ Keywords: inpainting, SD, Stable Diffusion

 ## [flair](https://github.com/flairNLP/flair)

-FLAIR is a powerful PyTorch NLP framework, covering several important tasks: NER, sentiment-analysis, part-of-speech tagging, text and document embeddings, among other things.
+FLAIR is a powerful PyTorch NLP framework, convering several important tasks: NER, sentiment-analysis, part-of-speech tagging, text and document embeddings, among other things.

 Keywords: NLP, text embedding, document embedding, biomedical, NER, PoS, sentiment-analysis

@ -39,15 +39,15 @@ MindsDB is a low-code ML platform, which automates and integrates several ML fra

 Keywords: Database, low-code, AI table

-## [langchain](https://github.com/langchain-ai/langchain)
+## [langchain](https://github.com/hwchase17/langchain)

-[langchain](https://github.com/langchain-ai/langchain) is aimed at assisting in the development of apps merging both LLMs and other sources of knowledge. The library allows chaining calls to applications, creating a sequence across many tools.
+[langchain](https://github.com/hwchase17/langchain) is aimed at assisting in the development of apps merging both LLMs and other sources of knowledge. The library allows chaining calls to applications, creating a sequence across many tools.

 Keywords: LLMs, Large Language Models, Agents, Chains

-## [LlamaIndex](https://github.com/run-llama/llama_index)
+## [LlamaIndex](https://github.com/jerryjliu/llama_index)

-[LlamaIndex](https://github.com/run-llama/llama_index) is a project that provides a central interface to connect your LLM's with external data. It provides various kinds of indices and retrieval mechanisms to perform different LLM tasks and obtain knowledge-augmented results.
+[LlamaIndex](https://github.com/jerryjliu/llama_index) is a project that provides a central interface to connect your LLM's with external data. It provides various kinds of indices and retreival mechanisms to perform different LLM tasks and obtain knowledge-augmented results.

 Keywords: LLMs, Large Language Models, Data Retrieval, Indices, Knowledge Augmentation 

@ -146,9 +146,9 @@ Keywords: Framework, simplicity, NLP

 Keywords: LLM, Agents, HF Hub

-## [transformers.js](https://github.com/huggingface/transformers.js/)
+## [transformers.js](https://xenova.github.io/transformers.js/)

-[transformers.js](https://github.com/huggingface/transformers.js/) is a JavaScript library targeted at running models from transformers directly within the browser.
+[transformers.js](https://xenova.github.io/transformers.js/) is a JavaScript library targeted at running models from transformers directly within the browser.

 Keywords: Transformers, JavaScript, browser

@ -288,7 +288,7 @@ Keywords: Music understanding, Music generation

 ## [dalle-flow](https://github.com/jina-ai/dalle-flow)

-DALL·E Flow is an interactive workflow for generating high-definition images from a text prompt. It leverages DALL·E-Mega, GLID-3 XL, and Stable Diffusion to generate image candidates, and then calls CLIP-as-service to rank the candidates w.r.t. the prompt.
+DALL·E Flow is an interactive workflow for generating high-definition images from a text prompt. Itt leverages DALL·E-Mega, GLID-3 XL, and Stable Diffusion to generate image candidates, and then calls CLIP-as-service to rank the candidates w.r.t. the prompt.
 The preferred candidate is fed to GLID-3 XL for diffusion, which often enriches the texture and background. Finally, the candidate is upscaled to 1024x1024 via SwinIR.

 Keywords: High-definition image generation, Stable Diffusion, DALL-E Mega, GLID-3 XL, CLIP, SwinIR
@ -437,7 +437,7 @@ Keywords: DALL-E, Russian

 Keywords: Knowledge Extraction, Knowledge Graphs

-## [Nebuly](https://github.com/nebuly-ai/optimate)
+## [Nebuly](https://github.com/nebuly-ai/nebuly)

 Nebuly is the next-generation platform to monitor and optimize your AI costs in one place. The platform connects to all your AI cost sources (compute, API providers, AI software licenses, etc) and centralizes them in one place to give you full visibility on a model basis. The platform also provides optimization recommendations and a co-pilot model that can guide during the optimization process. The platform builds on top of the open-source tools allowing you to optimize the different steps of your AI stack to squeeze out the best possible cost performances.

@ -526,7 +526,7 @@ Keywords: Model deployment, CLoud, Mobile, Edge

 ## [underthesea](https://github.com/undertheseanlp/underthesea)

-[underthesea](https://github.com/undertheseanlp/underthesea) is a Vietnamese NLP toolkit. Underthesea is a suite of open source Python modules data sets and tutorials supporting research and development in Vietnamese Natural Language Processing. We provide extremely easy API to quickly apply pretrained NLP models to your Vietnamese text, such as word segmentation, part-of-speech tagging (PoS), named entity recognition (NER), text classification and dependency parsing.
+[underthesea](https://github.com/undertheseanlp/underthesea) is a Vietnamese NLP toolkit. Underthesea is a suite of open source Python modules data sets and tutorials supporting research and development in Vietnamese Natural Language Processing. We provides extremely easy API to quickly apply pretrained NLP models to your Vietnamese text, such as word segmentation, part-of-speech tagging (PoS), named entity recognition (NER), text classification and dependency parsing.

 Keywords: Vietnamese, NLP

--- a/benchmark/.gitignore
+++ b/benchmark/.gitignore
@ -1 +0,0 @@
-benchmark_results/
--- a/benchmark/README.md
+++ b/benchmark/README.md
@ -12,7 +12,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,

 ## Writing metrics to the database

-`MetricsRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
+`MetricRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.

 cf [`llama.py`](./llama.py) to see an example of this in practice.

--- a/benchmark/benches/llama.py
+++ b/benchmark/benches/llama.py
@ -1,345 +0,0 @@
-# Copyright 2025 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from logging import Logger
-import os
-from threading import Event, Thread
-from time import perf_counter, sleep
-from typing import Optional
-import sys
-
-# Add the parent directory to Python path to import benchmarks_entrypoint
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from benchmarks_entrypoint import MetricsRecorder
-
-import gpustat
-import psutil
-import psycopg2
-
-# Optional heavy ML dependencies - only required when actually running the benchmark
-try:
-    import torch
-    from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
-    TRANSFORMERS_AVAILABLE = True
-except ImportError:
-    TRANSFORMERS_AVAILABLE = False
-    torch = None
-    AutoModelForCausalLM = None
-    AutoTokenizer = None
-    GenerationConfig = None
-    StaticCache = None
-
-os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
-os.environ["TOKENIZERS_PARALLELISM"] = "1"
-
-# Only set torch precision if torch is available
-if TRANSFORMERS_AVAILABLE:
-    torch.set_float32_matmul_precision("high")
-
-
-def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
-    p = psutil.Process(os.getpid())
-    while not continue_metric_collection.is_set():
-        with p.oneshot():
-            cpu_util = p.cpu_percent()
-            mem_megabytes = p.memory_info().rss / (1024 * 1024)
-        gpu_stats = gpustat.GPUStatCollection.new_query()
-        gpu_util = gpu_stats[0]["utilization.gpu"]
-        gpu_mem_megabytes = gpu_stats[0]["memory.used"]
-        metrics_recorder.collect_device_measurements(
-            benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
-        )
-        sleep(0.01)
-
-
-def run_benchmark(
-    logger: Logger, repository: str, branch: str, commit_id: str, commit_msg: str, metrics_recorder=None, num_tokens_to_generate=100
-):
-    # Check if required ML dependencies are available
-    if not TRANSFORMERS_AVAILABLE:
-        logger.error("Transformers and torch are required to run the LLaMA benchmark. Please install them with:")
-        logger.error("pip install torch transformers")
-        logger.error("Skipping LLaMA benchmark due to missing dependencies.")
-        return
-    
-    continue_metric_collection = Event()
-    metrics_thread = None
-    model_id = "meta-llama/Llama-2-7b-hf"
-    
-    # If no metrics_recorder is provided, create one for backward compatibility
-    if metrics_recorder is None:
-        try:
-            metrics_recorder = MetricsRecorder(
-                psycopg2.connect("dbname=metrics"), logger, repository, branch, commit_id, commit_msg, True
-            )
-            should_close_recorder = True
-        except Exception as e:
-            logger.error(f"Failed to create metrics recorder: {e}")
-            return
-    else:
-        should_close_recorder = False
-    try:
-        gpu_stats = gpustat.GPUStatCollection.new_query()
-        gpu_name = gpu_stats[0]["name"]
-        benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
-        logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}")
-        metrics_thread = Thread(
-            target=collect_metrics,
-            args=[benchmark_id, continue_metric_collection, metrics_recorder],
-        )
-        metrics_thread.start()
-        logger.info("started background thread to fetch device metrics")
-
-        os.environ["TOKENIZERS_PARALLELISM"] = "false"  # silence warnings when compiling
-
-        device = "cuda"
-
-        logger.info("downloading weights")
-        # This is to avoid counting download in model load time measurement
-        model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16)
-        gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
-        logger.info("loading model")
-        start = perf_counter()
-        model = AutoModelForCausalLM.from_pretrained(
-            model_id, torch_dtype=torch.float16, generation_config=gen_config
-        ).eval()
-        model.to(device)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        model_load_time = end - start
-        logger.info(f"loaded model in: {model_load_time}s")
-
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-
-        prompt = "Why dogs are so cute?"
-        inputs = tokenizer(prompt, return_tensors="pt").to(device)
-
-        # Specify the max length (including both the prompt and the response)
-        # When calling `generate` with `cache_implementation="static" later, this is also used to create a `StaticCache` object
-        # with sequence length = `max_length`. The longer the more you will re-use it
-        seq_length = inputs["input_ids"].shape[1]
-        model.generation_config.max_length = seq_length + num_tokens_to_generate
-        batch_size = inputs["input_ids"].shape[0]
-
-        # Copied from the gpt-fast repo
-        def multinomial_sample_one_no_sync(probs_sort):  # Does multinomial sampling without a cuda synchronization
-            q = torch.empty_like(probs_sort).exponential_(1)
-            return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)
-
-        def logits_to_probs(logits, temperature: float = 1.0, top_k: Optional[int] = None):
-            logits = logits / max(temperature, 1e-5)
-
-            if top_k is not None:
-                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
-                pivot = v.select(-1, -1).unsqueeze(-1)
-                logits = torch.where(logits < pivot, -float("Inf"), logits)
-            probs = torch.nn.functional.softmax(logits, dim=-1)
-            return probs
-
-        def sample(logits, temperature: float = 1.0, top_k: Optional[int] = None):
-            probs = logits_to_probs(logits[0, -1], temperature, top_k)
-            idx_next = multinomial_sample_one_no_sync(probs)
-            return idx_next, probs
-
-        # First eager forward pass
-        logger.info("running first eager forward pass")
-        start = perf_counter()
-        outputs = model(**inputs)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        first_eager_fwd_pass_time = end - start
-        logger.info(f"completed first eager forward pass in: {first_eager_fwd_pass_time}s")
-
-        # Second eager forward pass (should be faster)
-        logger.info("running second eager forward pass")
-        start = perf_counter()
-        outputs = model(**inputs)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        second_eager_fwd_pass_time = end - start
-        logger.info(f"completed second eager forward pass in: {second_eager_fwd_pass_time}s")
-
-        # First eager generation
-        logger.info("running first eager generation")
-        start = perf_counter()
-        output = model.generate(**inputs)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        first_eager_generate_time = end - start
-        logger.info(f"completed first eager generation in: {first_eager_generate_time}s")
-        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-        # Second eager generation (should be faster)
-        logger.info("running second eager generation")
-        start = perf_counter()
-        output = model.generate(**inputs)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        second_eager_generate_time = end - start
-        logger.info(f"completed second eager generation in: {second_eager_generate_time}s")
-        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-        logger.info("running generation timing loop")
-
-        input_pos = torch.arange(0, seq_length, device=device)
-        inputs = inputs["input_ids"]
-
-        start = perf_counter()
-        with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
-            logits = model(inputs, position_ids=input_pos).logits
-        next_token, probs = sample(logits, temperature=0.6, top_k=5)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        time_to_first_token = end - start
-
-        input_pos = torch.tensor([seq_length], device=device, dtype=torch.int)
-        next_token = next_token.clone()
-        start = perf_counter()
-        with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
-            logits = model(next_token, position_ids=input_pos).logits
-        next_token, probs = sample(logits, temperature=0.6, top_k=5)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        time_to_second_token = end - start
-
-        input_pos = torch.tensor([seq_length + 1], device=device, dtype=torch.int)
-        next_token = next_token.clone()
-        start = perf_counter()
-        with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
-            logits = model(next_token, position_ids=input_pos).logits
-        next_token, probs = sample(logits, temperature=0.6, top_k=5)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        time_to_third_token = end - start
-
-        logger.info("running longer generation timing loop")
-
-        total_time = 0
-        for i in range(20):
-            input_pos = torch.tensor([seq_length + 2 + i], device=device, dtype=torch.int)
-            next_token = next_token.clone()
-            start = perf_counter()
-            with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
-                logits = model(next_token, position_ids=input_pos).logits
-            next_token, probs = sample(logits, temperature=0.6, top_k=5)
-            torch.cuda.synchronize()
-            end = perf_counter()
-            total_time += end - start
-
-        mean_time_to_next_token = total_time / 20
-
-        logger.info("running compilation benchmarks")
-
-        # Now compile the model
-        model = torch.compile(model, mode="max-autotune", fullgraph=True)
-
-        # StaticCache for generation
-        with torch.device(device):
-            model.setup_caches(max_batch_size=batch_size, max_seq_len=seq_length + num_tokens_to_generate)
-
-        input_pos = torch.arange(0, seq_length, device=device)
-        inputs = tokenizer(prompt, return_tensors="pt").to(device)["input_ids"]
-
-        logger.info("compiling model")
-
-        model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, generation_config=gen_config)
-        model.to(device)
-        model = torch.compile(model, mode="max-autotune", fullgraph=True)
-
-        past_key_values = StaticCache(
-            model.config,
-            max_batch_size=batch_size,
-            device=device,
-            dtype=torch.float16,
-            max_cache_len=seq_length + 128,
-        )
-        # 1st call
-        start = perf_counter()
-        output = model.generate(**inputs, past_key_values=past_key_values)
-        end = perf_counter()
-        first_compile_generate_time = end - start
-        logger.info(f"completed first compile generation in: {first_compile_generate_time}s")
-        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-        past_key_values = StaticCache(
-            model.config,
-            max_batch_size=batch_size,
-            device=device,
-            dtype=torch.float16,
-            max_cache_len=seq_length + 128,
-        )
-        # 2nd call
-        start = perf_counter()
-        output = model.generate(**inputs, past_key_values=past_key_values)
-        end = perf_counter()
-        second_compile_generate_time = end - start
-        logger.info(f"completed second compile generation in: {second_compile_generate_time}s")
-        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-        past_key_values = StaticCache(
-            model.config,
-            max_batch_size=batch_size,
-            device=device,
-            dtype=torch.float16,
-            max_cache_len=seq_length + 128,
-        )
-        # 3rd call
-        start = perf_counter()
-        output = model.generate(**inputs, past_key_values=past_key_values)
-        end = perf_counter()
-        third_compile_generate_time = end - start
-        logger.info(f"completed third compile generation in: {third_compile_generate_time}s")
-        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-        past_key_values = StaticCache(
-            model.config,
-            max_batch_size=batch_size,
-            device=device,
-            dtype=torch.float16,
-            max_cache_len=seq_length + 128,
-        )
-        # 4th call
-        start = perf_counter()
-        output = model.generate(**inputs, past_key_values=past_key_values)
-        end = perf_counter()
-        fourth_compile_generate_time = end - start
-        logger.info(f"completed fourth compile generation in: {fourth_compile_generate_time}s")
-        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-        metrics_recorder.collect_model_measurements(
-            benchmark_id,
-            {
-                "model_load_time": model_load_time,
-                "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
-                "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
-                "first_eager_generate_time_secs": first_eager_generate_time,
-                "second_eager_generate_time_secs": second_eager_generate_time,
-                "time_to_first_token_secs": time_to_first_token,
-                "time_to_second_token_secs": time_to_second_token,
-                "time_to_third_token_secs": time_to_third_token,
-                "time_to_next_token_mean_secs": mean_time_to_next_token,
-                "first_compile_generate_time_secs": first_compile_generate_time,
-                "second_compile_generate_time_secs": second_compile_generate_time,
-                "third_compile_generate_time_secs": third_compile_generate_time,
-                "fourth_compile_generate_time_secs": fourth_compile_generate_time,
-            },
-        )
-    except Exception as e:
-        logger.error(f"Caught exception: {e}")
-    continue_metric_collection.set()
-    if metrics_thread is not None:
-        metrics_thread.join()
-    
-    # Only close the recorder if we created it locally
-    if should_close_recorder:
-        metrics_recorder.close() 
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@ -90,7 +90,7 @@ def summarize(run_dir, metrics, expand_metrics=False):

        model = benchmark.config.backend["model"]

-        # This looks like `benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5`.
+        # Ths looks like `benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5`.
        # (we rely on the usage of hydra's `${hydra.job.override_dirname}`.)
        benchmark_name = re.sub(f"backend.model={model},*", "", report_dir)
        benchmark_name = str(Path(benchmark_name).parts[-1])
--- a/benchmark/benchmarks_entrypoint.py
+++ b/benchmark/benchmarks_entrypoint.py
@ -1,35 +1,16 @@
-# Copyright 2025 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 import argparse
 import importlib.util
 import logging
 import os
+from typing import Dict
+import psycopg2
 import sys
-import json
-import uuid
-from datetime import datetime
-from typing import Dict, Tuple, Optional, List

-import pandas as pd
+from psycopg2.extras import Json
+from psycopg2.extensions import register_adapter

-try:
-    from psycopg2.extensions import register_adapter
-    from psycopg2.extras import Json
-    register_adapter(dict, Json)
-    PSYCOPG2_AVAILABLE = True
-except ImportError:
-    PSYCOPG2_AVAILABLE = False
+
+register_adapter(dict, Json)


 class ImportModuleException(Exception):
@ -37,240 +18,59 @@ class ImportModuleException(Exception):


 class MetricsRecorder:
-    def __init__(
-        self, connection, logger: logging.Logger, repository: str, branch: str, commit_id: str, commit_msg: str, 
-        collect_csv_data: bool = True
-    ):
+    def __init__(self, connection, logger: logging.Logger, branch: str, commit_id: str, commit_msg: str):
        self.conn = connection
-        self.use_database = connection is not None
-        if self.use_database:
-            self.conn.autocommit = True
+        self.conn.autocommit = True
        self.logger = logger
-        self.repository = repository
        self.branch = branch
        self.commit_id = commit_id
        self.commit_msg = commit_msg
-        self.collect_csv_data = collect_csv_data
-        
-        # For CSV export - store all data in pandas DataFrames (only if CSV collection is enabled)
-        if self.collect_csv_data:
-            # Initialize empty DataFrames with proper schemas
-            self.benchmarks_df = pd.DataFrame(columns=[
-                'benchmark_id', 'repository', 'branch', 'commit_id', 'commit_message', 
-                'metadata', 'created_at'
-            ])
-            self.device_measurements_df = pd.DataFrame(columns=[
-                'benchmark_id', 'cpu_util', 'mem_megabytes', 'gpu_util', 
-                'gpu_mem_megabytes', 'time'
-            ])
-            self.model_measurements_df = pd.DataFrame(columns=[
-                'benchmark_id', 'time', 'model_load_time', 'first_eager_forward_pass_time_secs',
-                'second_eager_forward_pass_time_secs', 'first_eager_generate_time_secs',
-                'second_eager_generate_time_secs', 'time_to_first_token_secs',
-                'time_to_second_token_secs', 'time_to_third_token_secs',
-                'time_to_next_token_mean_secs', 'first_compile_generate_time_secs',
-                'second_compile_generate_time_secs', 'third_compile_generate_time_secs',
-                'fourth_compile_generate_time_secs'
-            ])
-        else:
-            self.benchmarks_df = None
-            self.device_measurements_df = None
-            self.model_measurements_df = None

-    def initialise_benchmark(self, metadata: dict[str, str]) -> str:
+    def initialise_benchmark(self, metadata: Dict[str, str]) -> int:
        """
-        Creates a new benchmark, returns the benchmark id (UUID)
+        Creates a new benchmark, returns the benchmark id
        """
-        # Generate a unique UUID for this benchmark
-        benchmark_id = str(uuid.uuid4())
-        
-        if self.use_database:
-            with self.conn.cursor() as cur:
-                cur.execute(
-                    "INSERT INTO benchmarks (benchmark_id, repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s, %s)",
-                    (benchmark_id, self.repository, self.branch, self.commit_id, self.commit_msg, metadata),
-                )
-                self.logger.debug(f"initialised benchmark #{benchmark_id}")
-        
-        # Store benchmark data for CSV export (if enabled)
-        if self.collect_csv_data:
-            # Add row to pandas DataFrame
-            new_row = pd.DataFrame([{
-                'benchmark_id': benchmark_id,
-                'repository': self.repository,
-                'branch': self.branch,
-                'commit_id': self.commit_id,
-                'commit_message': self.commit_msg,
-                'metadata': json.dumps(metadata),
-                'created_at': datetime.utcnow().isoformat()
-            }])
-            self.benchmarks_df = pd.concat([self.benchmarks_df, new_row], ignore_index=True)
-            
-        mode_info = []
-        if self.use_database:
-            mode_info.append("database")
-        if self.collect_csv_data:
-            mode_info.append("CSV")
-        mode_str = " + ".join(mode_info) if mode_info else "no storage"
-        
-        self.logger.debug(f"initialised benchmark #{benchmark_id} ({mode_str} mode)")
-        return benchmark_id
+        # gpu_name: str, model_id: str
+        with self.conn.cursor() as cur:
+            cur.execute(
+                "INSERT INTO benchmarks (branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s) RETURNING benchmark_id",
+                (self.branch, self.commit_id, self.commit_msg, metadata),
+            )
+            benchmark_id = cur.fetchone()[0]
+            logger.debug(f"initialised benchmark #{benchmark_id}")
+            return benchmark_id

-    def collect_device_measurements(self, benchmark_id: str, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
+    def collect_device_measurements(self, benchmark_id: int, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
        """
        Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function.
        """
-        # Store device measurements for CSV export (if enabled)
-        if self.collect_csv_data:
-            # Add row to pandas DataFrame
-            new_row = pd.DataFrame([{
-                'benchmark_id': benchmark_id,
-                'cpu_util': cpu_util,
-                'mem_megabytes': mem_megabytes,
-                'gpu_util': gpu_util,
-                'gpu_mem_megabytes': gpu_mem_megabytes,
-                'time': datetime.utcnow().isoformat()
-            }])
-            self.device_measurements_df = pd.concat([self.device_measurements_df, new_row], ignore_index=True)
-        
-        # Store in database if available
-        if self.use_database:
-            with self.conn.cursor() as cur:
-                cur.execute(
-                    "INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
-                    (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
-                )
-            
+        with self.conn.cursor() as cur:
+            cur.execute(
+                "INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
+                (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
+            )
        self.logger.debug(
-            f"collected device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
+            f"inserted device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
        )

-    def collect_model_measurements(self, benchmark_id: str, measurements: dict[str, float]):
-        # Store model measurements for CSV export (if enabled)
-        if self.collect_csv_data:
-            # Add row to pandas DataFrame with flattened measurements
-            row_data = {
-                'benchmark_id': benchmark_id,
-                'time': datetime.utcnow().isoformat()
-            }
-            # Flatten the measurements dict into the row
-            row_data.update(measurements)
-            
-            new_row = pd.DataFrame([row_data])
-            self.model_measurements_df = pd.concat([self.model_measurements_df, new_row], ignore_index=True)
-        
-        # Store in database if available
-        if self.use_database:
-            with self.conn.cursor() as cur:
-                cur.execute(
-                    """
-                    INSERT INTO model_measurements (
-                        benchmark_id,
-                        measurements
-                    ) VALUES (%s, %s)
-                    """,
-                    (
-                        benchmark_id,
-                        measurements,
-                    ),
-                )
-            
-        self.logger.debug(f"collected model measurements for benchmark #{benchmark_id}: {measurements}")
-
-    def export_to_csv(self, output_dir: str = "benchmark_results"):
-        """
-        Export all collected data to CSV files using pandas DataFrames
-        """
-        if not self.collect_csv_data:
-            self.logger.warning("CSV data collection is disabled - no CSV files will be generated")
-            return
-            
-        if not os.path.exists(output_dir):
-            os.makedirs(output_dir)
-            self.logger.info(f"Created output directory: {output_dir}")
-            
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        files_created = []
-        
-        # Export using pandas DataFrames
-        self._export_pandas_data(output_dir, timestamp, files_created)
-        
-        self.logger.info(f"CSV export complete! Created {len(files_created)} files in {output_dir}")
-    
-    def _export_pandas_data(self, output_dir: str, timestamp: str, files_created: list):
-        """
-        Export CSV files using pandas DataFrames
-        """
-        # Export benchmarks
-        benchmarks_file = os.path.join(output_dir, f"benchmarks_{timestamp}.csv")
-        self.benchmarks_df.to_csv(benchmarks_file, index=False)
-        files_created.append(benchmarks_file)
-        self.logger.info(f"Exported {len(self.benchmarks_df)} benchmark records to {benchmarks_file}")
-        
-        # Export device measurements  
-        device_file = os.path.join(output_dir, f"device_measurements_{timestamp}.csv")
-        self.device_measurements_df.to_csv(device_file, index=False)
-        files_created.append(device_file)
-        self.logger.info(f"Exported {len(self.device_measurements_df)} device measurement records to {device_file}")
-        
-        # Export model measurements (already flattened)
-        model_file = os.path.join(output_dir, f"model_measurements_{timestamp}.csv")
-        self.model_measurements_df.to_csv(model_file, index=False)
-        files_created.append(model_file)
-        self.logger.info(f"Exported {len(self.model_measurements_df)} model measurement records to {model_file}")
-        
-        # Create comprehensive summary using pandas operations
-        summary_file = os.path.join(output_dir, f"benchmark_summary_{timestamp}.csv")
-        self._create_summary(summary_file)
-        files_created.append(summary_file)
-    
-    def _create_summary(self, summary_file: str):
-        """
-        Create a comprehensive summary CSV using pandas operations
-        """
-        if len(self.benchmarks_df) == 0:
-            # Create empty summary file
-            summary_df = pd.DataFrame()
-            summary_df.to_csv(summary_file, index=False)
-            self.logger.info(f"Created empty benchmark summary at {summary_file}")
-            return
-        
-        # Start with benchmarks as the base
-        summary_df = self.benchmarks_df.copy()
-        
-        # Add model measurements (join on benchmark_id)
-        if len(self.model_measurements_df) > 0:
-            # Drop 'time' column from model measurements to avoid conflicts
-            model_df = self.model_measurements_df.drop(columns=['time'], errors='ignore')
-            summary_df = summary_df.merge(model_df, on='benchmark_id', how='left')
-        
-        # Calculate device measurement aggregates using pandas groupby
-        if len(self.device_measurements_df) > 0:
-            device_agg = self.device_measurements_df.groupby('benchmark_id').agg({
-                'cpu_util': ['mean', 'max', 'std', 'count'],
-                'mem_megabytes': ['mean', 'max', 'std'],
-                'gpu_util': ['mean', 'max', 'std'],
-                'gpu_mem_megabytes': ['mean', 'max', 'std']
-            }).round(3)
-            
-            # Flatten column names
-            device_agg.columns = [f"{col[0]}_{col[1]}" for col in device_agg.columns]
-            device_agg = device_agg.reset_index()
-            
-            # Rename count column to be more descriptive
-            if 'cpu_util_count' in device_agg.columns:
-                device_agg = device_agg.rename(columns={'cpu_util_count': 'device_measurement_count'})
-            
-            # Merge with summary
-            summary_df = summary_df.merge(device_agg, on='benchmark_id', how='left')
-        
-        # Export the comprehensive summary
-        summary_df.to_csv(summary_file, index=False)
-        self.logger.info(f"Created comprehensive benchmark summary with {len(summary_df)} records at {summary_file}")
+    def collect_model_measurements(self, benchmark_id: int, measurements: Dict[str, float]):
+        with self.conn.cursor() as cur:
+            cur.execute(
+                """
+                INSERT INTO model_measurements (
+                    benchmark_id,
+                    measurements
+                ) VALUES (%s, %s)
+                """,
+                (
+                    benchmark_id,
+                    measurements,
+                ),
+            )
+        self.logger.debug(f"inserted model measurements for benchmark #{benchmark_id}: {measurements}")

    def close(self):
-        if self.use_database and self.conn:
-            self.conn.close()
+        self.conn.close()


 logger = logging.getLogger(__name__)
@ -283,18 +83,12 @@ handler.setFormatter(formatter)
 logger.addHandler(handler)


-def parse_arguments() -> tuple[str, str, str, str, bool, str]:
+def parse_arguments():
    """
    Parse command line arguments for the benchmarking CLI.
    """
    parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")

-    parser.add_argument(
-        "repository",
-        type=str,
-        help="The repository name on which the benchmarking is performed.",
-    )
-
    parser.add_argument(
        "branch",
        type=str,
@ -312,27 +106,10 @@ def parse_arguments() -> tuple[str, str, str, str, bool, str]:
        type=str,
        help="The commit message associated with the commit, truncated to 70 characters.",
    )
-    
-    parser.add_argument(
-        "--csv",
-        action="store_true",
-        default=False,
-        help="Enable CSV output files generation."
-    )
-    
-    parser.add_argument(
-        "--csv-output-dir",
-        type=str,
-        default="benchmark_results",
-        help="Directory for CSV output files (default: benchmark_results)."
-    )

    args = parser.parse_args()
-    
-    # CSV is disabled by default, only enabled when --csv is used
-    generate_csv = args.csv

-    return args.repository, args.branch, args.commit_id, args.commit_msg, generate_csv, args.csv_output_dir
+    return args.branch, args.commit_id, args.commit_msg


 def import_from_path(module_name, file_path):
@ -346,124 +123,22 @@ def import_from_path(module_name, file_path):
        raise ImportModuleException(f"failed to load python module: {e}")


-def create_database_connection():
-    """
-    Try to create a database connection. Returns None if connection fails.
-    """
-    if not PSYCOPG2_AVAILABLE:
-        logger.warning("psycopg2 not available - running in CSV-only mode")
-        return None
-        
-    try:
-        import psycopg2
-        conn = psycopg2.connect("dbname=metrics")
-        logger.info("Successfully connected to database")
-        return conn
-    except Exception as e:
-        logger.warning(f"Failed to connect to database: {e}. Running in CSV-only mode")
-        return None
-
-
-def create_global_metrics_recorder(repository: str, branch: str, commit_id: str, commit_msg: str, 
-                                   generate_csv: bool = False) -> MetricsRecorder:
-    """
-    Create a global metrics recorder that will be used across all benchmarks.
-    """
-    connection = create_database_connection()
-    recorder = MetricsRecorder(connection, logger, repository, branch, commit_id, commit_msg, generate_csv)
-    
-    # Log the storage mode
-    storage_modes = []
-    if connection is not None:
-        storage_modes.append("database")
-    if generate_csv:
-        storage_modes.append("CSV")
-    
-    if not storage_modes:
-        logger.warning("Running benchmarks with NO data storage (no database connection, CSV disabled)")
-        logger.warning("Use --csv flag to enable CSV output when database is unavailable")
-    else:
-        logger.info(f"Running benchmarks with: {' + '.join(storage_modes)} storage")
-    
-    return recorder
-
-
 if __name__ == "__main__":
    benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__))
-    benches_folder_path = os.path.join(benchmarks_folder_path, "benches")

-    repository, branch, commit_id, commit_msg, generate_csv, csv_output_dir = parse_arguments()
-    
-    # Create a global metrics recorder
-    global_metrics_recorder = create_global_metrics_recorder(repository, branch, commit_id, commit_msg, generate_csv)
-    
-    successful_benchmarks = 0
-    failed_benchmarks = 0
-    
-    # Automatically discover all benchmark modules in benches/ folder
-    benchmark_modules = []
-    
-    if os.path.exists(benches_folder_path):
-        logger.debug(f"Scanning for benchmarks in: {benches_folder_path}")
-        for entry in os.scandir(benches_folder_path):
+    branch, commit_id, commit_msg = parse_arguments()
+
+    for entry in os.scandir(benchmarks_folder_path):
+        try:
            if not entry.name.endswith(".py"):
                continue
-            if entry.name.startswith("__"):  # Skip __init__.py, __pycache__, etc.
+            if entry.path == __file__:
                continue
-                
-            # Check if the file has a run_benchmark function
-            try:
-                logger.debug(f"checking if benches/{entry.name} has run_benchmark function")
-                module = import_from_path(entry.name.split(".")[0], entry.path)
-                if hasattr(module, 'run_benchmark'):
-                    benchmark_modules.append(entry.name)
-                    logger.debug(f"discovered benchmark: {entry.name}")
-                else:
-                    logger.debug(f"skipping {entry.name} - no run_benchmark function found")
-            except Exception as e:
-                logger.debug(f"failed to check benches/{entry.name}: {e}")
-    else:
-        logger.warning(f"Benches directory not found: {benches_folder_path}")
-
-    if benchmark_modules:
-        logger.info(f"Discovered {len(benchmark_modules)} benchmark(s): {benchmark_modules}")
-    else:
-        logger.warning("No benchmark modules found in benches/ directory")
-
-    for module_name in benchmark_modules:
-        module_path = os.path.join(benches_folder_path, module_name)
-        try:
-            logger.debug(f"loading: {module_name}")
-            module = import_from_path(module_name.split(".")[0], module_path)
-            logger.info(f"running benchmarks in: {module_name}")
-            
-            # Check if the module has an updated run_benchmark function that accepts metrics_recorder
-            try:
-                # Try the new signature first
-                module.run_benchmark(logger, repository, branch, commit_id, commit_msg, global_metrics_recorder)
-            except TypeError:
-                # Fall back to the old signature for backward compatibility
-                logger.warning(f"Module {module_name} using old run_benchmark signature - database connection will be created per module")
-                module.run_benchmark(logger, repository, branch, commit_id, commit_msg)
-            
-            successful_benchmarks += 1
+            logger.debug(f"loading: {entry.name}")
+            module = import_from_path(entry.name.split(".")[0], entry.path)
+            logger.info(f"runnning benchmarks in: {entry.name}")
+            module.run_benchmark(logger, branch, commit_id, commit_msg)
        except ImportModuleException as e:
            logger.error(e)
-            failed_benchmarks += 1
        except Exception as e:
-            logger.error(f"error running benchmarks for {module_name}: {e}")
-            failed_benchmarks += 1
-
-    # Export CSV results at the end (if enabled)
-    try:
-        if generate_csv:
-            global_metrics_recorder.export_to_csv(csv_output_dir)
-            logger.info(f"CSV reports have been generated and saved to the {csv_output_dir} directory")
-        else:
-            logger.info("CSV generation disabled - no CSV files created (use --csv to enable)")
-        
-        logger.info(f"Benchmark run completed. Successful: {successful_benchmarks}, Failed: {failed_benchmarks}")
-    except Exception as e:
-        logger.error(f"Failed to export CSV results: {e}")
-    finally:
-        global_metrics_recorder.close()
+            logger.error(f"error running benchmarks for {entry.name}: {e}")
--- a/benchmark/init_db.sql
+++ b/benchmark/init_db.sql
@ -0,0 +1,33 @@
+CREATE TABLE IF NOT EXISTS benchmarks (
+  benchmark_id SERIAL PRIMARY KEY,
+  branch VARCHAR(255),
+  commit_id VARCHAR(72),
+  commit_message VARCHAR(70),
+  metadata jsonb,
+  created_at timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
+);
+
+CREATE INDEX IF NOT EXISTS benchmarks_benchmark_id_idx ON benchmarks (benchmark_id);
+
+CREATE INDEX IF NOT EXISTS benchmarks_branch_idx ON benchmarks (branch);
+
+CREATE TABLE IF NOT EXISTS device_measurements (
+  measurement_id SERIAL PRIMARY KEY,
+  benchmark_id int REFERENCES benchmarks (benchmark_id),
+  cpu_util double precision,
+  mem_megabytes double precision,
+  gpu_util double precision,
+  gpu_mem_megabytes double precision,
+  time timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
+);
+
+CREATE INDEX IF NOT EXISTS device_measurements_branch_idx ON device_measurements (benchmark_id);
+
+CREATE TABLE IF NOT EXISTS model_measurements (
+  measurement_id SERIAL PRIMARY KEY,
+  benchmark_id int REFERENCES benchmarks (benchmark_id),
+  measurements jsonb,
+  time timestamp without time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
+);
+
+CREATE INDEX IF NOT EXISTS model_measurements_branch_idx ON model_measurements (benchmark_id);
--- a/benchmark/llama.py
+++ b/benchmark/llama.py
@ -0,0 +1,342 @@
+from logging import Logger
+import os
+from threading import Event, Thread
+from time import perf_counter, sleep
+from typing import Optional
+from benchmarks_entrypoint import MetricsRecorder
+import gpustat
+import psutil
+import psycopg2
+import torch
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
+
+
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+
+os.environ["TOKENIZERS_PARALLELISM"] = "1"
+torch.set_float32_matmul_precision("high")
+
+
+def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
+    p = psutil.Process(os.getpid())
+    while not continue_metric_collection.is_set():
+        with p.oneshot():
+            cpu_util = p.cpu_percent()
+            mem_megabytes = p.memory_info().rss / (1024 * 1024)
+        gpu_stats = gpustat.GPUStatCollection.new_query()
+        gpu_util = gpu_stats[0]["utilization.gpu"]
+        gpu_mem_megabytes = gpu_stats[0]["memory.used"]
+        metrics_recorder.collect_device_measurements(
+            benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
+        )
+        sleep(0.01)
+
+
+def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
+    continue_metric_collection = Event()
+    metrics_thread = None
+    model_id = "meta-llama/Llama-2-7b-hf"
+    metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
+    try:
+        gpu_stats = gpustat.GPUStatCollection.new_query()
+        gpu_name = gpu_stats[0]["name"]
+        benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
+        logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}")
+        metrics_thread = Thread(
+            target=collect_metrics,
+            args=[benchmark_id, continue_metric_collection, metrics_recorder],
+        )
+        metrics_thread.start()
+        logger.info("started background thread to fetch device metrics")
+
+        os.environ["TOKENIZERS_PARALLELISM"] = "false"  # silence warnings when compiling
+
+        device = "cuda"
+
+        logger.info("downloading weights")
+        # This is to avoid counting download in model load time measurement
+        model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16)
+        gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
+        logger.info("loading model")
+        start = perf_counter()
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id, torch_dtype=torch.float16, generation_config=gen_config
+        ).eval()
+        model.to(device)
+        torch.cuda.synchronize()
+        end = perf_counter()
+        model_load_time = end - start
+        logger.info(f"loaded model in: {model_load_time}s")
+
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+
+        prompt = "Why dogs are so cute?"
+        inputs = tokenizer(prompt, return_tensors="pt").to(device)
+
+        # Specify the max length (including both the prompt and the response)
+        # When calling `generate` with `cache_implementation="static" later, this is also used to create a `StaticCache` object
+        # with sequence length = `max_length`. The longer the more you will re-use it
+        seq_length = inputs["input_ids"].shape[1]
+        model.generation_config.max_length = seq_length + num_tokens_to_generate
+        batch_size = inputs["input_ids"].shape[0]
+
+        # Copied from the gpt-fast repo
+        def multinomial_sample_one_no_sync(probs_sort):  # Does multinomial sampling without a cuda synchronization
+            q = torch.empty_like(probs_sort).exponential_(1)
+            return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)
+
+        def logits_to_probs(logits, temperature: float = 1.0, top_k: Optional[int] = None):
+            logits = logits / max(temperature, 1e-5)
+
+            if top_k is not None:
+                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
+                pivot = v.select(-1, -1).unsqueeze(-1)
+                logits = torch.where(logits < pivot, -float("Inf"), logits)
+            probs = torch.nn.functional.softmax(logits, dim=-1)
+            return probs
+
+        def sample(logits, temperature: float = 1.0, top_k: Optional[int] = None):
+            probs = logits_to_probs(logits[:, -1], temperature, top_k)
+            idx_next = multinomial_sample_one_no_sync(probs)
+            return idx_next, probs
+
+        def decode_one_token(model, cur_token, cache_position, past_key_values):
+            logits = model(
+                cur_token,
+                cache_position=cache_position,
+                past_key_values=past_key_values,
+                return_dict=False,
+                use_cache=True,
+            )[0]
+            new_token = sample(logits, temperature=0.6, top_k=5)[0]
+            return new_token
+
+        #########
+        # Eager #
+        #########
+        with torch.no_grad():
+            past_key_values = StaticCache(
+                model.config,
+                batch_size=batch_size,
+                device=device,
+                dtype=torch.float16,
+                max_cache_len=seq_length + num_tokens_to_generate,
+            )
+            cache_position = torch.arange(seq_length, device=device)
+            start = perf_counter()
+            model(
+                **inputs,
+                cache_position=cache_position,
+                past_key_values=past_key_values,
+                return_dict=False,
+                use_cache=True,
+            )
+            end = perf_counter()
+            first_eager_fwd_pass_time = end - start
+            logger.info(f"completed first eager fwd pass in: {first_eager_fwd_pass_time}s")
+            start = perf_counter()
+            output = model.generate(**inputs, do_sample=False)
+            end = perf_counter()
+            first_eager_generate_time = end - start
+            logger.info(f"completed first eager generation in: {first_eager_generate_time}s")
+            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+
+            past_key_values = StaticCache(
+                model.config,
+                batch_size=batch_size,
+                device=device,
+                dtype=torch.float16,
+                max_cache_len=seq_length + num_tokens_to_generate,
+            )
+            cache_position = torch.arange(seq_length, device=device)
+            start = perf_counter()
+            model(
+                **inputs,
+                cache_position=cache_position,
+                past_key_values=past_key_values,
+                return_dict=False,
+                use_cache=True,
+            )
+            end = perf_counter()
+            second_eager_fwd_pass_time = end - start
+            logger.info(f"completed second eager fwd pass in: {second_eager_fwd_pass_time}s")
+            start = perf_counter()
+            model.generate(**inputs, do_sample=False)
+            end = perf_counter()
+            second_eager_generate_time = end - start
+            logger.info(f"completed second eager generation in: {second_eager_generate_time}s")
+            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+
+            torch.compiler.reset()
+
+            ################
+            # Forward pass #
+            ################
+
+            # `torch.compile(model, ...)` is not recommended as you compile callbacks
+            # and full generate. We recommend compiling only the forward for now.
+            # "reduce-overhead" will use cudagraphs.
+            generated_ids = torch.zeros(
+                (batch_size, num_tokens_to_generate + seq_length), dtype=torch.int, device=device
+            )
+
+            generated_ids[:, :seq_length] = inputs["input_ids"]
+            decode_one_token = torch.compile(decode_one_token, mode="reduce-overhead", fullgraph=True)
+            # model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
+            # TODO use  decode_one_token(model, input_id.clone(), cache_position) for verification
+            past_key_values = StaticCache(
+                model.config,
+                batch_size=batch_size,
+                device=device,
+                dtype=torch.float16,
+                max_cache_len=seq_length + num_tokens_to_generate + 10,
+            )
+            cache_position = torch.arange(seq_length, device=device)
+            all_generated_tokens = []
+            ### First compile, prefill
+            start = perf_counter()
+            next_token = decode_one_token(
+                model, inputs["input_ids"], cache_position=cache_position, past_key_values=past_key_values
+            )
+            torch.cuda.synchronize()
+            end = perf_counter()
+            time_to_first_token = end - start
+            logger.info(f"completed first compile generation in: {time_to_first_token}s")
+            cache_position += 1
+            all_generated_tokens += next_token.clone().detach().cpu().tolist()
+
+            cache_position = torch.tensor([seq_length], device=device)
+            ### First compile, decoding
+            start = perf_counter()
+            next_token = decode_one_token(
+                model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values
+            )
+            torch.cuda.synchronize()
+            end = perf_counter()
+            time_to_second_token = end - start
+            logger.info(f"completed second compile generation in: {time_to_first_token}s")
+            cache_position += 1
+            all_generated_tokens += next_token.clone().detach().cpu().tolist()
+
+            ### Second compile, decoding
+            start = perf_counter()
+            next_token = decode_one_token(
+                model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values
+            )
+            torch.cuda.synchronize()
+            end = perf_counter()
+            time_to_third_token = end - start
+            logger.info(f"completed third compile forward in: {time_to_first_token}s")
+            cache_position += 1
+            all_generated_tokens += next_token.clone().detach().cpu().tolist()
+
+            ### Using cuda graphs decoding
+
+            start = perf_counter()
+            for _ in range(1, num_tokens_to_generate):
+                all_generated_tokens += next_token.clone().detach().cpu().tolist()
+                next_token = decode_one_token(
+                    model, next_token.clone(), cache_position=cache_position, past_key_values=past_key_values
+                )
+                cache_position += 1
+            torch.cuda.synchronize()
+            end = perf_counter()
+            mean_time_to_next_token = (end - start) / num_tokens_to_generate
+            logger.info(f"completed next compile generation in: {mean_time_to_next_token}s")
+            logger.info(f"generated: {tokenizer.batch_decode(all_generated_tokens)}")
+
+            ####################
+            # Generate compile #
+            ####################
+            torch.compiler.reset()
+            # we will not compile full generate as it' s to intensive, tho we measure full forward!
+
+            past_key_values = StaticCache(
+                model.config,
+                batch_size=batch_size,
+                device=device,
+                dtype=torch.float16,
+                max_cache_len=seq_length + 128,
+            )
+
+            # 1st call
+            start = perf_counter()
+            output = model.generate(**inputs, past_key_values=past_key_values)
+            torch.cuda.synchronize()
+            end = perf_counter()
+            first_compile_generate_time = end - start
+            logger.info(f"completed first compile generation in: {first_compile_generate_time}s")
+            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+
+            past_key_values = StaticCache(
+                model.config,
+                batch_size=batch_size,
+                device=device,
+                dtype=torch.float16,
+                max_cache_len=seq_length + 128,
+            )
+            # 2nd call
+            start = perf_counter()
+            output = model.generate(**inputs, past_key_values=past_key_values)
+            torch.cuda.synchronize()
+            end = perf_counter()
+            second_compile_generate_time = end - start
+            logger.info(f"completed second compile generation in: {second_compile_generate_time}s")
+            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+
+            past_key_values = StaticCache(
+                model.config,
+                batch_size=batch_size,
+                device=device,
+                dtype=torch.float16,
+                max_cache_len=seq_length + 128,
+            )
+
+            # 3nd call
+            start = perf_counter()
+            output = model.generate(**inputs, past_key_values=past_key_values)
+            end = perf_counter()
+            third_compile_generate_time = end - start
+            logger.info(f"completed second compile generation in: {third_compile_generate_time}s")
+            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+
+            past_key_values = StaticCache(
+                model.config,
+                batch_size=batch_size,
+                device=device,
+                dtype=torch.float16,
+                max_cache_len=seq_length + 128,
+            )
+            # 4th call
+            start = perf_counter()
+            output = model.generate(**inputs, past_key_values=past_key_values)
+            end = perf_counter()
+            fourth_compile_generate_time = end - start
+            logger.info(f"completed second compile generation in: {fourth_compile_generate_time}s")
+            logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+
+        metrics_recorder.collect_model_measurements(
+            benchmark_id,
+            {
+                "model_load_time": model_load_time,
+                "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
+                "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
+                "first_eager_generate_time_secs": first_eager_generate_time,
+                "second_eager_generate_time_secs": second_eager_generate_time,
+                "time_to_first_token_secs": time_to_first_token,
+                "time_to_second_token_secs": time_to_second_token,
+                "time_to_third_token_secs": time_to_third_token,
+                "time_to_next_token_mean_secs": mean_time_to_next_token,
+                "first_compile_generate_time_secs": first_compile_generate_time,
+                "second_compile_generate_time_secs": second_compile_generate_time,
+                "third_compile_generate_time_secs": third_compile_generate_time,
+                "fourth_compile_generate_time_secs": fourth_compile_generate_time,
+            },
+        )
+    except Exception as e:
+        logger.error(f"Caught exception: {e}")
+    continue_metric_collection.set()
+    if metrics_thread is not None:
+        metrics_thread.join()
+    metrics_recorder.close()
--- a/benchmark/requirements.txt
+++ b/benchmark/requirements.txt
@ -2,5 +2,4 @@ gpustat==1.1.1
 psutil==6.0.0
 psycopg2==2.9.9
 torch>=2.4.0
-hf_transfer
-pandas>=1.5.0
+hf_transfer
--- a/benchmark/utils/init_db.sql
+++ b/benchmark/utils/init_db.sql
--- a/conftest.py
+++ b/conftest.py
@ -23,12 +23,12 @@ from os.path import abspath, dirname, join
 import _pytest
 import pytest

-from transformers.testing_utils import HfDoctestModule, HfDocTestParser, is_torch_available
+from transformers.testing_utils import HfDoctestModule, HfDocTestParser


 NOT_DEVICE_TESTS = {
    "test_tokenization",
-    "test_tokenization_mistral_common",
+    "test_processor",
    "test_processing",
    "test_beam_constraints",
    "test_configuration_utils",
@ -46,6 +46,10 @@ NOT_DEVICE_TESTS = {
    "test_keep_in_fp32_modules",
    "test_gradient_checkpointing_backward_compatibility",
    "test_gradient_checkpointing_enable_disable",
+    "test_save_load_fast_init_from_base",
+    "test_fast_init_context_manager",
+    "test_fast_init_tied_embeddings",
+    "test_save_load_fast_init_to_base",
    "test_torch_save_load",
    "test_initialization",
    "test_forward_signature",
@ -57,6 +61,7 @@ NOT_DEVICE_TESTS = {
    "test_load_save_without_tied_weights",
    "test_tied_weights_keys",
    "test_model_weights_reload_no_missing_tied_weights",
+    "test_pt_tf_model_equivalence",
    "test_mismatched_shapes_have_properly_initialized_weights",
    "test_matched_shapes_have_loaded_weights_when_some_mismatched_shapes_exist",
    "test_model_is_small",
@ -66,6 +71,7 @@ NOT_DEVICE_TESTS = {
    "ModelTester::test_pipeline_",
    "/repo_utils/",
    "/utils/",
+    "/agents/",
 }

 # allow having multiple repository checkouts and not needing to remember to rerun
@ -79,12 +85,17 @@ warnings.simplefilter(action="ignore", category=FutureWarning)


 def pytest_configure(config):
+    config.addinivalue_line(
+        "markers", "is_pt_tf_cross_test: mark test to run only when PT and TF interactions are tested"
+    )
+    config.addinivalue_line(
+        "markers", "is_pt_flax_cross_test: mark test to run only when PT and FLAX interactions are tested"
+    )
    config.addinivalue_line("markers", "is_pipeline_test: mark test to run only when pipelines are tested")
    config.addinivalue_line("markers", "is_staging_test: mark test to run only in the staging environment")
    config.addinivalue_line("markers", "accelerate_tests: mark test that require accelerate")
+    config.addinivalue_line("markers", "agent_tests: mark the agent tests that are run on their specific schedule")
    config.addinivalue_line("markers", "not_device_test: mark the tests always running on cpu")
-    config.addinivalue_line("markers", "torch_compile_test: mark test which tests torch compile functionality")
-    config.addinivalue_line("markers", "torch_export_test: mark test which tests torch export functionality")


 def pytest_collection_modifyitems(items):
@ -129,10 +140,3 @@ class CustomOutputChecker(OutputChecker):
 doctest.OutputChecker = CustomOutputChecker
 _pytest.doctest.DoctestModule = HfDoctestModule
 doctest.DocTestParser = HfDocTestParser
-
-if is_torch_available():
-    import torch
-
-    # The flag below controls whether to allow TF32 on cuDNN. This flag defaults to True.
-    # We set it to `False` for CI. See https://github.com/pytorch/pytorch/issues/157274#issuecomment-3090791615
-    torch.backends.cudnn.allow_tf32 = False
--- a/docker/README.md
+++ b/docker/README.md
@ -2,8 +2,8 @@

 In this folder you will find various docker files, and some subfolders. 
 - dockerfiles (ex: `consistency.dockerfile`) present under `~/docker` are used for our "fast" CIs. You should be able to use them for tasks that only need CPU. For example `torch-light` is a very light weights container (703MiB). 
- subfolders contain dockerfiles used for our `slow` CIs, which *can* be used for GPU tasks, but they are **BIG** as they were not specifically designed for a single model / single task. Thus the `~/docker/transformers-pytorch-gpu` includes additional dependencies to allow us to run ALL model tests (say `librosa` or `tesseract`, which you do not need to run LLMs)
+- subfloder contain dockerfiles used for our `slow` CIs, which *can* be used for GPU tasks, but they are **BIG** as they were not specifically designed for a single model / single task. Thus the `~/docker/transformers-pytorch-gpu` includes additional dependencies to allow us to run ALL model tests (say `librosa` or `tesseract`, which you do not need to run LLMs)

 Note that in both case, you need to run `uv pip install -e .`, which should take around 5 seconds. We do it outside the dockerfile for the need of our CI: we checkout a new branch each time, and the `transformers` code is thus updated. 

-We are open to contribution, and invite the community to create dockerfiles with potential arguments that properly choose extras depending on the model's dependencies! :hugs: 
+We are open to contribution, and invite the community to create dockerfiles with potential arguments that properly choose extras depending on the model's dependencies! :hugs: 
--- a/docker/consistency.dockerfile
+++ b/docker/consistency.dockerfile
@ -1,16 +1,16 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 USER root
 ARG REF=main
 RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs
 ENV UV_PYTHON=/usr/local/bin/python
-RUN pip install uv && uv pip install --no-cache-dir -U pip setuptools GitPython
-RUN uv pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
+RUN pip install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools GitPython
+RUN pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
 # tensorflow pin matching setup.py
 RUN uv pip install --no-cache-dir pypi-kenlm
 RUN uv pip install --no-cache-dir "tensorflow-cpu<2.16" "tf-keras<2.16"
 RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,testing,torch-speech,vision]"
 RUN git lfs install

-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
+RUN pip uninstall -y transformers
+RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/custom-tokenizers.dockerfile
+++ b/docker/custom-tokenizers.dockerfile
@ -1,10 +1,9 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
 USER root
 RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake wget xz-utils build-essential g++5 libprotobuf-dev protobuf-compiler
 ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
+RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools

 RUN wget https://github.com/ku-nlp/jumanpp/releases/download/v2.0.0-rc3/jumanpp-2.0.0-rc3.tar.xz
 RUN tar xvf jumanpp-2.0.0-rc3.tar.xz
@ -17,11 +16,11 @@ RUN make install -j 10


 RUN uv pip install --no-cache --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-cache-dir  --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install  --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ja,testing,sentencepiece,jieba,spacy,ftfy,rjieba]" unidic unidic-lite
+RUN uv pip install --no-cache-dir  --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu 
+RUN uv pip install  --no-cache-dir "transformers[ja,testing,sentencepiece,jieba,spacy,ftfy,rjieba]" unidic unidic-lite
 # spacy is not used so not tested. Causes to failures. TODO fix later
-RUN uv run python -m unidic download
-RUN uv pip uninstall transformers
+RUN python3 -m unidic download
+RUN pip uninstall -y transformers

 RUN apt-get clean && rm -rf /var/lib/apt/lists/*
-RUN apt remove -y g++ cmake  xz-utils libprotobuf-dev protobuf-compiler
+RUN apt remove -y g++ cmake  xz-utils libprotobuf-dev protobuf-compiler
--- a/docker/examples-tf.dockerfile
+++ b/docker/examples-tf.dockerfile
@ -1,13 +1,12 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
 USER root
 RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git
 RUN apt-get install -y g++ cmake
 ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv
+RUN pip --no-cache-dir install uv && uv venv
 RUN uv pip install --no-cache-dir -U pip setuptools albumentations seqeval
-RUN uv pip install  --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]"
-RUN uv pip install --no-cache-dir  "protobuf==3.20.3"
-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
+RUN pip install  --upgrade --no-cache-dir "transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]"
+RUN uv pip install --no-cache-dir  "protobuf==3.20.3" 
+RUN pip uninstall -y transformers
+RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/examples-torch.dockerfile
+++ b/docker/examples-torch.dockerfile
@ -1,12 +1,11 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
-ARG REF=main
 USER root
-RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git ffmpeg
+RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git
 ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" seqeval albumentations jiwer
-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
+RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
+RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
+RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu 
+RUN uv pip install --no-cache-dir librosa "transformers[sklearn,sentencepiece,vision,testing]" seqeval albumentations jiwer
+RUN pip uninstall -y transformers
+RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/exotic-models.dockerfile
+++ b/docker/exotic-models.dockerfile
@ -1,17 +1,17 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
 RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git libgl1-mesa-glx libgl1 g++ tesseract-ocr
 ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
+RUN pip --no-cache-dir install uv &&  uv venv && uv pip install --no-cache-dir -U pip setuptools
+RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
 RUN uv pip install --no-cache-dir  --no-deps timm accelerate
-RUN uv pip install -U --upgrade-strategy eager --no-cache-dir pytesseract python-Levenshtein opencv-python nltk
+RUN pip install -U --upgrade-strategy eager --no-cache-dir pytesseract python-Levenshtein opencv-python nltk
 # RUN uv pip install --no-cache-dir natten==0.15.1+torch210cpu -f https://shi-labs.com/natten/wheels
-RUN uv pip install  --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[testing, vision]" 'scikit-learn' 'torch-stft' 'nose'  'dataset'
+RUN pip install  --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[testing, vision]" 'scikit-learn' 'torch-stft' 'nose'  'dataset'
 # RUN git clone https://github.com/facebookresearch/detectron2.git
 # RUN python3 -m pip install --no-cache-dir -e detectron2
-RUN uv pip install 'git+https://github.com/facebookresearch/detectron2.git@92ae9f0b92aba5867824b4f12aa06a22a60a45d3' --no-build-isolation
-RUN uv pip uninstall transformers
+RUN pip install 'git+https://github.com/facebookresearch/detectron2.git@92ae9f0b92aba5867824b4f12aa06a22a60a45d3'
+RUN pip uninstall -y transformers
 RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/jax-light.dockerfile
+++ b/docker/jax-light.dockerfile
@ -1,10 +1,10 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
 RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git g++ cmake
 ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,testing,sentencepiece,flax-speech,vision]"
-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
+RUN pip --no-cache-dir install uv &&  uv venv && uv pip install --no-cache-dir -U pip setuptools
+RUN pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,testing,sentencepiece,flax-speech,vision]"
+RUN pip uninstall -y transformers
+RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/pipeline-tf.dockerfile
+++ b/docker/pipeline-tf.dockerfile
@ -1,10 +1,10 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
 RUN apt-get update && apt-get install -y libsndfile1-dev espeak-ng time git cmake g++
 ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]"
+RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
+RUN pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]"
 RUN uv pip install --no-cache-dir  "protobuf==3.20.3" tensorflow_probability
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
+RUN apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/pipeline-torch.dockerfile
+++ b/docker/pipeline-torch.dockerfile
@ -1,11 +1,11 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
-RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git ffmpeg
+RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git pkg-config openssh-client git
 ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
+RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
+RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
+RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu 
 RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]"
-RUN uv pip uninstall transformers
+RUN pip uninstall -y transformers
--- a/docker/quality.dockerfile
+++ b/docker/quality.dockerfile
@ -1,9 +1,9 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
-RUN apt-get update && apt-get install -y time git
+RUN apt-get update && apt-get install -y time git 
 ENV UV_PYTHON=/usr/local/bin/python
-RUN pip install uv
+RUN pip install uv &&  uv venv
 RUN uv pip install --no-cache-dir -U pip setuptools GitPython "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[ruff]" urllib3
-RUN apt-get install -y jq curl && apt-get clean && rm -rf /var/lib/apt/lists/*
+RUN apt-get install -y jq curl && apt-get clean && rm -rf /var/lib/apt/lists/*
--- a/docker/tf-light.dockerfile
+++ b/docker/tf-light.dockerfile
@ -1,12 +1,12 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
 RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ pkg-config openssh-client git
 RUN apt-get install -y  cmake
 ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install  --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]"
-RUN uv pip install --no-cache-dir  "protobuf==3.20.3"
-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
+RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
+RUN pip install  --upgrade --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,testing,sentencepiece,tf-speech,vision]"
+RUN uv pip install --no-cache-dir  "protobuf==3.20.3" 
+RUN pip uninstall -y transformers
+RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/torch-jax-light.dockerfile
+++ b/docker/torch-jax-light.dockerfile
@ -1,16 +1,16 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
 RUN apt-get update &&  apt-get install -y libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git
 ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
+RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
 RUN uv pip install --no-deps accelerate
-RUN uv pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,audio,sklearn,sentencepiece,vision,testing]"
+RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
+RUN pip install --no-cache-dir "scipy<1.13" "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,audio,sklearn,sentencepiece,vision,testing]"


 # RUN pip install --no-cache-dir "scipy<1.13" "transformers[flax,testing,sentencepiece,flax-speech,vision]"

-RUN uv pip uninstall transformers
+RUN pip uninstall -y transformers
 RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/torch-light.dockerfile
+++ b/docker/torch-light.dockerfile
@ -1,11 +1,11 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 USER root
-RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git git-lfs ffmpeg
+RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git git-lfs
 ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
-RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' 'torchcodec' --index-url https://download.pytorch.org/whl/cpu
+RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
+RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
 RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken,num2words,video]"
-RUN uv pip uninstall transformers
+RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken]"
+RUN pip uninstall -y transformers
--- a/docker/torch-tf-light.dockerfile
+++ b/docker/torch-tf-light.dockerfile
@ -1,19 +1,19 @@
-FROM python:3.9-slim
+FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ARG REF=main
 RUN echo ${REF}
 USER root
 RUN apt-get update &&  apt-get install -y --no-install-recommends libsndfile1-dev espeak-ng time git g++ cmake pkg-config openssh-client git git-lfs
 ENV UV_PYTHON=/usr/local/bin/python
-RUN pip --no-cache-dir install uv && uv pip install --no-cache-dir -U pip setuptools
+RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools
 RUN uv pip install --no-cache-dir  --no-deps accelerate --extra-index-url https://download.pytorch.org/whl/cpu 
-RUN uv pip install --no-cache-dir 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu
+RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
 RUN git lfs install

 RUN uv pip install --no-cache-dir pypi-kenlm
-RUN uv pip install --no-cache-dir  "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,sentencepiece,vision,testing]"
+RUN pip install --no-cache-dir  "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[tf-cpu,sklearn,sentencepiece,vision,testing]"
 RUN uv pip install --no-cache-dir  "protobuf==3.20.3" librosa


-RUN uv pip uninstall transformers
-RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
+RUN pip uninstall -y transformers
+RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
--- a/docker/transformers-all-latest-gpu/Dockerfile
+++ b/docker/transformers-all-latest-gpu/Dockerfile
@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.6.0-cudnn-devel-ubuntu22.04
+FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04
 LABEL maintainer="Hugging Face"

 ARG DEBIAN_FRONTEND=noninteractive
@ -9,11 +9,11 @@ SHELL ["sh", "-lc"]
 # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
 # to be used as arguments for docker build (so far).

-ARG PYTORCH='2.8.0'
+ARG PYTORCH='2.5.1'
+# (not always a valid torch version)
+ARG INTEL_TORCH_EXT='2.3.0'
 # Example: `cu102`, `cu113`, etc.
-ARG CUDA='cu126'
-# Disable kernel mapping for now until all tests pass
-ENV DISABLE_KERNEL_MAPPING=1
+ARG CUDA='cu121'

 RUN apt update
 RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg git-lfs
@ -26,11 +26,11 @@ RUN git clone https://github.com/huggingface/transformers && cd transformers &&
 # 1. Put several commands in a single `RUN` to avoid image/layer exporting issue. Could be revised in the future.
 # 2. Regarding `torch` part, We might need to specify proper versions for `torchvision` and `torchaudio`.
 #    Currently, let's not bother to specify their versions explicitly (so installed with their latest release versions).
-RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] && [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile && echo torch=$VERSION && [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA && python3 -m pip uninstall -y tensorflow tensorflow_text tensorflow_probability
+RUN python3 -m pip install --no-cache-dir -U tensorflow==2.13 protobuf==3.20.3 "tensorflow_text<2.16" "tensorflow_probability<0.22" && python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] && [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile && echo torch=$VERSION && [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA

 RUN python3 -m pip uninstall -y flax jax

-RUN python3 -m pip install --no-cache-dir -U timm
+RUN python3 -m pip install --no-cache-dir intel_extension_for_pytorch==$INTEL_TORCH_EXT -f https://developer.intel.com/ipex-whl-stable-cpu

 RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract
 RUN python3 -m pip install -U "itsdangerous<2.1.0"
@ -43,7 +43,7 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum

 # For video model testing
-RUN python3 -m pip install --no-cache-dir av
+RUN python3 -m pip install --no-cache-dir av==9.2.0

 # Some slow tests require bnb
 RUN python3 -m pip install --no-cache-dir bitsandbytes
@ -57,8 +57,7 @@ RUN python3 -m pip uninstall -y ninja

 # For `dinat` model
 # The `XXX` part in `torchXXX` needs to match `PYTORCH` (to some extent)
-# pin `0.17.4` otherwise `cannot import name 'natten2dav' from 'natten.functional'`
-RUN python3 -m pip install --no-cache-dir natten==0.17.4+torch250cu121 -f https://shi-labs.com/natten/wheels
+RUN python3 -m pip install --no-cache-dir natten==0.15.1+torch220$CUDA -f https://shi-labs.com/natten/wheels

 # For `nougat` tokenizer
 RUN python3 -m pip install --no-cache-dir python-Levenshtein
@ -69,12 +68,6 @@ RUN python3 -m pip install --no-cache-dir g2p-en
 # For Some bitsandbytes tests
 RUN python3 -m pip install --no-cache-dir einops

-# For Some tests with `@require_liger_kernel`
-RUN python3 -m pip install --no-cache-dir liger-kernel
-
-# `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
-RUN python3 -m pip uninstall -y kernels
-
 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.
 RUN cd transformers && python3 setup.py develop
--- a/docker/transformers-past-gpu/Dockerfile
+++ b/docker/transformers-past-gpu/Dockerfile
@ -48,8 +48,8 @@ RUN python3 -m pip uninstall -y torch-tensorrt apex
 # Pre-build **nightly** release of DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
 RUN python3 -m pip uninstall -y deepspeed
 # This has to be run inside the GPU VMs running the tests. (So far, it fails here due to GPU checks during compilation.)
-# Issue: https://github.com/deepspeedai/DeepSpeed/issues/2010
-# RUN git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build && \
+# Issue: https://github.com/microsoft/DeepSpeed/issues/2010
+# RUN git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build && \
 #    DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1

 RUN python3 -m pip install -U "itsdangerous<2.1.0"
--- a/docker/transformers-pytorch-amd-gpu/Dockerfile
+++ b/docker/transformers-pytorch-amd-gpu/Dockerfile
@ -1,16 +1,19 @@
-FROM rocm/pytorch:rocm6.4.1_ubuntu24.04_py3.12_pytorch_release_2.7.1
+FROM rocm/dev-ubuntu-22.04:6.3
 LABEL maintainer="Hugging Face"

 ARG DEBIAN_FRONTEND=noninteractive

 RUN apt update && \
-    apt install -y --no-install-recommends git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-dev python3-pip python3-dev ffmpeg git-lfs && \
+    apt install -y --no-install-recommends git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-dev python3-pip python3-dev ffmpeg && \
    apt clean && \
    rm -rf /var/lib/apt/lists/*

-RUN git lfs install
+RUN export PATH="${PATH:+${PATH}:}~/opt/rocm/bin"

 RUN python3 -m pip install --no-cache-dir --upgrade pip numpy
+
+RUN python3 -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.3/
+
 RUN python3 -m pip install --no-cache-dir --upgrade importlib-metadata setuptools ninja git+https://github.com/facebookresearch/detectron2.git pytesseract "itsdangerous<2.1.0"

 ARG REF=main
@ -20,12 +23,8 @@ WORKDIR /
 ADD https://api.github.com/repos/huggingface/transformers/git/refs/heads/main version.json
 RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF

-# On ROCm, torchcodec is required to decode audio files
-# RUN python3 -m pip install --no-cache-dir torchcodec
-# Install transformers
-RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch,testing,video,audio]
+RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch,testing,video]

-# Remove tensorflow and flax as they are no longer supported by transformers
 RUN python3 -m pip uninstall -y tensorflow flax

 # When installing in editable mode, `transformers` is not recognized as a package.
@ -34,6 +33,3 @@ RUN cd transformers && python3 setup.py develop

 # Remove nvml and nvidia-ml-py as it is not compatible with ROCm. apex is not tested on NVIDIA either.
 RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y
-
-# `kernels` may causes many failing tests
-RUN python3 -m pip uninstall -y kernels
--- a/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile
+++ b/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile
@ -1,11 +1,11 @@
-FROM rocm/dev-ubuntu-22.04:6.2.4
+FROM rocm/dev-ubuntu-22.04:5.6
 LABEL maintainer="Hugging Face"

 ARG DEBIAN_FRONTEND=noninteractive
-ARG PYTORCH='2.6.0'
-ARG TORCH_VISION='0.21.0'
-ARG TORCH_AUDIO='2.6.0'
-ARG ROCM='6.2.4'
+ARG PYTORCH='2.1.1'
+ARG TORCH_VISION='0.16.1'
+ARG TORCH_AUDIO='2.1.1'
+ARG ROCM='5.6'

 RUN apt update && \
    apt install -y --no-install-recommends \
@ -16,11 +16,9 @@ RUN apt update && \
    python-is-python3 \
    rocrand-dev \
    rocthrust-dev \
-    rocblas-dev \
-    hipsolver-dev \
    hipsparse-dev \
    hipblas-dev \
-    hipblaslt-dev && \
+    rocblas-dev && \
    apt clean && \
    rm -rf /var/lib/apt/lists/*

@ -47,7 +45,4 @@ RUN cd transformers && python3 setup.py develop
 RUN python3 -c "from deepspeed.launcher.runner import main"

 # Remove nvml as it is not compatible with ROCm
-RUN python3 -m pip uninstall py3nvml pynvml nvidia-ml-py apex -y
-
-# `kernels` may causes many failing tests
-RUN python3 -m pip uninstall -y kernels
+RUN python3 -m pip uninstall py3nvml pynvml -y
--- a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
+++ b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
@ -1,12 +1,12 @@
-# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
-FROM nvcr.io/nvidia/pytorch:24.08-py3
+# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11
+FROM nvcr.io/nvidia/pytorch:23.04-py3
 LABEL maintainer="Hugging Face"

 ARG DEBIAN_FRONTEND=noninteractive

-ARG PYTORCH='2.8.0'
+ARG PYTORCH='2.2.0'
 # Example: `cu102`, `cu113`, etc.
-ARG CUDA='cu126'
+ARG CUDA='cu121'

 RUN apt -y update
 RUN apt install -y libaio-dev
@ -15,13 +15,12 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip
 ARG REF=main
 RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF

-# `datasets` requires pandas, pandas has some modules compiled with numpy=1.x causing errors
-RUN python3 -m pip install --no-cache-dir './transformers[deepspeed-testing]' 'pandas<2' 'numpy<2'
+RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]

 # Install latest release PyTorch
 # (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.)
 # (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
-RUN python3 -m pip uninstall -y torch torchvision torchaudio && python3 -m pip install --no-cache-dir -U torch==$PYTORCH torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/$CUDA
+RUN python3 -m pip uninstall -y torch torchvision torchaudio && python3 -m pip install --no-cache-dir -U torch==$PYTORCH torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA

 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate

@ -45,9 +44,6 @@ RUN python3 -m pip uninstall -y deepspeed
 # TODO: Find out why test fail.
 RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1

-# `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
-RUN python3 -m pip uninstall -y kernels
-
 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.
 RUN cd transformers && python3 setup.py develop
--- a/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile
+++ b/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile
@ -1,11 +1,11 @@
 # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-23-11.html#rel-23-11
-FROM nvcr.io/nvidia/pytorch:24.08-py3
+FROM nvcr.io/nvidia/pytorch:23.11-py3
 LABEL maintainer="Hugging Face"

 ARG DEBIAN_FRONTEND=noninteractive

 # Example: `cu102`, `cu113`, etc.
-ARG CUDA='cu126'
+ARG CUDA='cu121'

 RUN apt -y update
 RUN apt install -y libaio-dev
@ -19,10 +19,9 @@ RUN python3 -m pip uninstall -y torch torchvision torchaudio
 # Install **nightly** release PyTorch (flag `--pre`)
 # (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.)
 # (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
-RUN python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
+RUN python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA

-# `datasets` requires pandas, pandas has some modules compiled with numpy=1.x causing errors
-RUN python3 -m pip install --no-cache-dir './transformers[deepspeed-testing]' 'pandas<2' 'numpy<2'
+RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]

 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate

@ -35,8 +34,8 @@ RUN python3 -m pip uninstall -y torch-tensorrt apex
 # Pre-build **nightly** release of DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
 RUN python3 -m pip uninstall -y deepspeed
 # This has to be run inside the GPU VMs running the tests. (So far, it fails here due to GPU checks during compilation.)
-# Issue: https://github.com/deepspeedai/DeepSpeed/issues/2010
-# RUN git clone https://github.com/deepspeedai/DeepSpeed && cd DeepSpeed && rm -rf build && \
+# Issue: https://github.com/microsoft/DeepSpeed/issues/2010
+# RUN git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build && \
 #    DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1

 ## For `torchdynamo` tests
@ -57,9 +56,6 @@ RUN python3 -m pip uninstall -y deepspeed
 #RUN git clone https://github.com/pytorch/TensorRT.git
 #RUN cd TensorRT/py && python3 setup.py install --fx-only

-# `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
-RUN python3 -m pip uninstall -y kernels
-
 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.
 RUN cd transformers && python3 setup.py develop
--- a/docker/transformers-pytorch-gpu/Dockerfile
+++ b/docker/transformers-pytorch-gpu/Dockerfile
@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.6.0-cudnn-devel-ubuntu22.04
+FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04
 LABEL maintainer="Hugging Face"

 ARG DEBIAN_FRONTEND=noninteractive
@ -11,28 +11,23 @@ ARG REF=main
 RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF

 # If set to nothing, will install the latest version
-ARG PYTORCH='2.8.0'
+ARG PYTORCH='2.5.1'
 ARG TORCH_VISION=''
 ARG TORCH_AUDIO=''
 # Example: `cu102`, `cu113`, etc.
-ARG CUDA='cu126'
+ARG CUDA='cu121'

-RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch,testing,video]
-
-# Install torch stuff after ./transformers[dev-torch,testing,video], otherwise torch may be resolved to a previous
-# version.
 RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA
 RUN [ ${#TORCH_VISION} -gt 0 ] && VERSION='torchvision=='TORCH_VISION'.*' ||  VERSION='torchvision'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA
 RUN [ ${#TORCH_AUDIO} -gt 0 ] && VERSION='torchaudio=='TORCH_AUDIO'.*' ||  VERSION='torchaudio'; python3 -m pip install --no-cache-dir -U $VERSION --extra-index-url https://download.pytorch.org/whl/$CUDA

+RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch,testing,video]
+
 RUN python3 -m pip uninstall -y tensorflow flax

 RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract
 RUN python3 -m pip install -U "itsdangerous<2.1.0"

-# `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
-RUN python3 -m pip uninstall -y kernels
-
 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.
 RUN cd transformers && python3 setup.py develop
--- a/docker/transformers-pytorch-xpu/Dockerfile
+++ b/docker/transformers-pytorch-xpu/Dockerfile
@ -1,93 +0,0 @@
-FROM intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu22.04 AS base
-LABEL maintainer="Hugging Face"
-
-SHELL ["/bin/bash", "-c"]
-
-ARG PYTHON_VER=3.11
-ENV TORCH_DEVICE_BACKEND_AUTOLOAD=0
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get remove -y python3.10 && apt-get autoremove -y
-RUN apt-get update && \
-    apt-get install -y software-properties-common && \
-    add-apt-repository -y ppa:deadsnakes/ppa && \
-    apt-get update && \
-    apt-get install -y python$PYTHON_VER python$PYTHON_VER-dev python3-pip && \
-    ln -sf /usr/bin/python$PYTHON_VER /usr/bin/python3 && \
-    ln -sf /usr/bin/python3 /usr/bin/python && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN apt-get update && \
-    apt-get -y install \
-        apt-utils \
-        build-essential \
-        ca-certificates \
-        clinfo \
-        curl \
-        git \
-        git-lfs \
-        vim \
-        numactl \
-        gnupg2 \
-        gpg-agent \
-        zlib1g-dev \
-        rsync \
-        sudo \
-        libnl-genl-3-200 \
-        xpu-smi \
-        unzip \
-        ffmpeg \
-        tesseract-ocr \
-        espeak-ng \
-        wget \
-        ncurses-term && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-
-RUN apt-get update && \
-    apt-get install -y \
-        linux-headers-$(uname -r) \
-        linux-modules-extra-$(uname -r) \
-        flex bison \
-        intel-fw-gpu intel-i915-dkms xpu-smi \
-        intel-opencl-icd libze-intel-gpu1 libze1 \
-        intel-media-va-driver-non-free libmfx-gen1 libvpl2 \
-        libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
-        libglapi-mesa libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
-        mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo intel-ocloc \
-        libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev libze-dev && \
-    apt-get clean && \
-    rm -rf  /var/lib/apt/lists/*
-
-RUN pip install --upgrade pip
-RUN pip install triton==3.3.0
-
-RUN pip install torch==2.7.0 torchvision==0.22.0 torchaudio==2.7.0 --index-url https://download.pytorch.org/whl/xpu --no-cache-dir
-
-RUN pip install evaluate torchdata pyctcdecode pytesseract decord galore-torch fire scipy scikit-learn sentencepiece sacremoses nltk rouge_score librosa soundfile g2p_en mpi4py requests_mock
-RUN pip install pretty_midi essentia resampy Levenshtein av sacrebleu phonemizer invisible_watermark schedulefree
-RUN pip install gguf hqq compressed_tensors gptqmodel mergekit autoawq deepspeed torchao onnx
-RUN pip install hf_transfer huggingface-hub hf-doc-builder datasets optimum-quanto timm transformers accelerate optimum peft
-
-RUN pip install git+https://github.com/linkedin/Liger-Kernel.git --extra-index-url https://download.pytorch.org/whl/test/xpu
-
-# install bitsandbytes
-RUN pip install git+https://github.com/bitsandbytes-foundation/bitsandbytes.git
-
-ENV OCL_ICD_VENDORS=/etc/OpenCL/vendors
-ENV FI_PROVIDER_PATH=${I_MPI_ROOT}/lib/libfabric/prov:/usr/lib/x86_64-linux-gnu/libfabric
-ENV CCL_ROOT=/usr/local
-ENV CCL_ATL_TRANSPORT=ofi
-ENV I_MPI_ROOT=/usr/local
-ENV CLASSPATH=${I_MPI_ROOT}/lib/mpi.jar
-ENV PATH=${I_MPI_ROOT}/bin/libfabric:${PATH}
-ENV LD_LIBRARY_PATH=${I_MPI_ROOT}/lib/libfabric:${LD_LIBRARY_PATH}
-
-RUN touch /entrypoint.sh
-RUN chmod +x /entrypoint.sh
-RUN echo "#!/bin/bash" >> /entrypoint.sh
-RUN echo "source /opt/intel/oneapi/setvars.sh --force && /bin/bash" >> /entrypoint.sh
-
-ENTRYPOINT ["/entrypoint.sh"]
--- a/docker/transformers-quantization-latest-gpu/Dockerfile
+++ b/docker/transformers-quantization-latest-gpu/Dockerfile
@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
 LABEL maintainer="Hugging Face"

 ARG DEBIAN_FRONTEND=noninteractive
@ -9,11 +9,9 @@ SHELL ["sh", "-lc"]
 # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
 # to be used as arguments for docker build (so far).

-ARG PYTORCH='2.6.0'
+ARG PYTORCH='2.5.1'
 # Example: `cu102`, `cu113`, etc.
-ARG CUDA='cu121'
-# Disable kernel mapping for quantization tests
-ENV DISABLE_KERNEL_MAPPING=1
+ARG CUDA='cu118'

 RUN apt update
 RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg
@ -26,7 +24,9 @@ RUN [ ${#PYTORCH} -gt 0 ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch';
 RUN echo torch=$VERSION
 # `torchvision` and `torchaudio` should be installed along with `torch`, especially for nightly build.
 # Currently, let's just use their latest releases (when `torch` is installed with a release version)
-RUN python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio torchcodec --extra-index-url https://download.pytorch.org/whl/$CUDA
+RUN python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA
+
+RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch]

 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate

@ -36,9 +36,10 @@ RUN python3 -m pip install --no-cache-dir einops
 # Add bitsandbytes for mixed int8 testing
 RUN python3 -m pip install --no-cache-dir bitsandbytes

-# Add gptqmodel for gtpq quantization testing, installed from source for pytorch==2.6.0 compatibility
-RUN python3 -m pip install lm_eval
-RUN git clone https://github.com/ModelCloud/GPTQModel.git && cd GPTQModel && pip install -v . --no-build-isolation
+# Add auto-gptq for gtpq quantization testing, installed from source for pytorch==2.5.1 compatibility
+# TORCH_CUDA_ARCH_LIST="7.5+PTX" is added to make the package compile for Tesla T4 gpus available for the CI.
+RUN pip install gekko
+RUN git clone https://github.com/PanQiWei/AutoGPTQ.git && cd AutoGPTQ && TORCH_CUDA_ARCH_LIST="7.5+PTX" python3 setup.py install

 # Add optimum for gptq quantization testing
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum
@ -50,11 +51,7 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/pef
 RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2

 # Add vptq for quantization testing
-RUN pip install vptq
-
-# Add spqr for quantization testing
-# Commented for now as No matching distribution found we need to reach out to the authors
-# RUN python3 -m pip install --no-cache-dir spqr_quant[gpu]
+RUN python3 -m pip install --no-cache-dir vptq

 # Add hqq for quantization testing
 RUN python3 -m pip install --no-cache-dir hqq
@ -63,42 +60,18 @@ RUN python3 -m pip install --no-cache-dir hqq
 RUN python3 -m pip install --no-cache-dir gguf

 # Add autoawq for quantization testing
-# New release v0.2.8
-RUN python3 -m pip install --no-cache-dir autoawq[kernels]
+# >=v0.2.7 needed for compatibility with transformers > 4.46
+RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.7.post2/autoawq-0.2.7.post2-py3-none-any.whl

 # Add quanto for quantization testing
 RUN python3 -m pip install --no-cache-dir optimum-quanto

 # Add eetq for quantization testing
-RUN git clone https://github.com/NetEase-FuXi/EETQ.git && cd EETQ/ && git submodule update --init --recursive && pip install .
+RUN python3 -m pip install git+https://github.com/NetEase-FuXi/EETQ.git

-# # Add flute-kernel and fast_hadamard_transform for quantization testing
-# # Commented for now as they cause issues with the build
-# # TODO: create a new workflow to test them
-# RUN python3 -m pip install --no-cache-dir flute-kernel==0.4.1
-# RUN python3 -m pip install --no-cache-dir git+https://github.com/Dao-AILab/fast-hadamard-transform.git
-
-# Add fp-quant for quantization testing
-# Requires py3.11 but our CI runs on 3.9
-# RUN python3 -m pip install --no-cache-dir "fp-quant>=0.1.6"
-
-# Add compressed-tensors for quantization testing
-RUN python3 -m pip install --no-cache-dir compressed-tensors
-
-# Add AMD Quark for quantization testing
-RUN python3 -m pip install --no-cache-dir amd-quark
-
-# Add AutoRound for quantization testing
-RUN python3 -m pip install --no-cache-dir "auto-round>=0.5.0"
-
-# Add transformers in editable mode
-RUN python3 -m pip install --no-cache-dir -e ./transformers[dev-torch]
-
-# `kernels` may give different outputs (within 1e-5 range) even with the same model (weights) and the same inputs
-RUN python3 -m pip uninstall -y kernels
-
-# Uninstall flash-attn installed by autoawq, it causes issues here : https://github.com/huggingface/transformers/actions/runs/15915442841/job/44892146131
-RUN python3 -m pip uninstall -y flash-attn
+# Add flute-kernel and fast_hadamard_transform for quantization testing
+RUN python3 -m pip install --no-cache-dir flute-kernel==0.3.0 -i https://flute-ai.github.io/whl/cu118
+RUN python3 -m pip install --no-cache-dir fast_hadamard_transform==1.0.4.post1

 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.
--- a/docs/README.md
+++ b/docs/README.md
@ -20,21 +20,22 @@ To generate the documentation, you first have to build it. Several packages are
 you can install them with the following command, at the root of the code repository:

 ```bash
-pip install -e ".[dev]"
+pip install -e ".[docs]"
 ```

-> [!NOTE]
-> This command might fail for some OS that are missing dependencies. Check step 4 in [Create a Pull Request](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#create-a-pull-request) to workaround it.
-
 Then you need to install our special tool that builds the documentation:

 ```bash
 pip install git+https://github.com/huggingface/doc-builder
 ```

-> [!NOTE]
-> You only need to generate the documentation to inspect it locally (if you're planning changes and want to
-> check how they look before committing for instance). You don't have to commit the built documentation.
+---
+**NOTE**
+
+You only need to generate the documentation to inspect it locally (if you're planning changes and want to
+check how they look before committing for instance). You don't have to commit the built documentation.
+
+---

 ## Building the documentation

@ -71,8 +72,12 @@ doc-builder preview transformers docs/source/en/

 The docs will be viewable at [http://localhost:3000](http://localhost:3000). You can also preview the docs once you have opened a PR. You will see a bot add a comment to a link where the documentation with your changes lives.

-> [!NOTE]
-> The `preview` command only works with existing doc files. When you add a completely new file, you need to update `_toctree.yml` & restart `preview` command (`ctrl-c` to stop it & call `doc-builder preview ...` again).
+---
+**NOTE**
+
+The `preview` command only works with existing doc files. When you add a completely new file, you need to update `_toctree.yml` & restart `preview` command (`ctrl-c` to stop it & call `doc-builder preview ...` again).
+
+---

 ## Adding a new element to the navigation bar

@ -159,9 +164,6 @@ These classes should be added using our Markdown syntax. Usually as follows:
 [[autodoc]] XXXConfig
 ```

-> [!IMPORTANT]
-> Always add a blank line after `[[autodoc]]` to ensure it passes the CI/CD checks.
-
 This will include every public method of the configuration that is documented. If for some reason you wish for a method
 not to be displayed in the documentation, you can do so by specifying which methods should be in the docs:

@ -276,7 +278,7 @@ Here's an example of a single value return:

 ```python
    Returns:
-        `list[int]`: A list of integers in the range [0, 1] --- 1 for a special token, 0 for a sequence token.
+        `List[int]`: A list of integers in the range [0, 1] --- 1 for a special token, 0 for a sequence token.
 ```

 Here's an example of a tuple return, comprising several objects:
--- a/docs/source/ar/_toctree.yml
+++ b/docs/source/ar/_toctree.yml
@ -23,6 +23,8 @@
    title: تحميل النماذج المخصصة وتدريبها باستخدام 🤗 PEFT
  - local: model_sharing
    title: مشاركة نموذجك
+  - local: agents
+    title: الوكلاء
  - local: llm_tutorial
    title: التوليد باستخدام LLMs
  - local: conversations
@ -250,6 +252,8 @@
  title: أطر مفاهيمية
 # - sections:
 #   - sections:
+#     - local: main_classes/agent
+#       title: الوكلاء والأدوات
 #     - local: model_doc/auto
 #       title: فئات يتم إنشاؤها ديناميكيًا
 #     - local: main_classes/backbones
--- a/docs/source/ar/agents.md
+++ b/docs/source/ar/agents.md
@ -0,0 +1,539 @@
+# الوكلاء والأدوات
+
+[[open-in-colab]]
+
+### ما هو الوكيل؟
+
+يمكن للنظم اللغوية الكبيرة (LLMs) التي تم تدريبها على أداء [نمذجة اللغة السببية](./tasks/language_modeling.) التعامل مع مجموعة واسعة من المهام، ولكنها غالبًا ما تواجه صعوبات في المهام الأساسية مثل المنطق والحساب والبحث. وعندما يتم استدعاؤها في مجالات لا تؤدي فيها أداءً جيدًا، فإنها غالبًا ما تفشل في توليد الإجابة التي نتوقعها منها.
+
+يتمثل أحد النهج للتغلب على هذا القصور في إنشاء "وكيل".
+
+الوكيل هو نظام يستخدم LLM كمحرك له، ولديه حق الوصول إلى وظائف تسمى "أدوات".
+
+هذه "الأدوات" هي وظائف لأداء مهمة، وتحتوي على جميع الأوصاف اللازمة للوكيل لاستخدامها بشكل صحيح.
+
+يمكن برمجة الوكيل للقيام بما يلي:
+- وضع سلسلة من الإجراءات/الأدوات وتشغيلها جميعًا في نفس الوقت مثل [`CodeAgent`] على سبيل المثال
+- التخطيط للاجراءات/الأدوات وتنفيذها واحدة تلو الأخرى والانتظار حتى انتهاء كل إجراء قبل إطلاق التالي مثل [`ReactJsonAgent`] على سبيل المثال
+
+### أنواع الوكلاء
+
+#### الوكيل البرمجي (Code agent)
+
+يتمتع هذا الوكيل يتبع خطوات محددة: أولًا، يخطط لسلسلة من الإجراءات التي يريد تنفيذها، ثم شفرة Python لتنفيذ جميع الإجراءات في نفس الوقت. وهو يتعامل بشكل أصلي مع أنواع مختلفة من المدخلات والمخرجات للأدوات التي يستخدمها، وبالتالي فهو الخيار الموصى به للمهام متعددة الوسائط.
+
+#### وكلاء التفاعل
+
+هذا هو الوكيل الذي يتم اللجوء إليه لحل مهام الاستدلال، حيث يجعل إطار ReAct ([Yao et al.، 2022](https://huggingface.co/papers/2210.03629)) من الكفاءة حقًا التفكير على أساس ملاحظاته السابقة.
+
+نقوم بتنفيذ إصدارين من ReactJsonAgent: 
+- [`ReactJsonAgent`] يقوم بتوليد استدعاءات الأدوات كـ JSON في إخراجها.
+- [`ReactCodeAgent`] هو نوع جديد من ReactJsonAgent يقوم بتوليد استدعاءات أدواته كمقاطع من التعليمات البرمجية، والتي تعمل بشكل جيد حقًا مع LLMs التي تتمتع بأداء  قوي في البرمجة.
+
+> [!TIP]
+> اقرأ منشور المدونة [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) لمعرفة المزيد عن وكيل ReAct.
+
+![إطار عمل وكيل ReAct](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/open-source-llms-as-agents/ReAct.png)
+
+على سبيل المثال، إليك كيف يعمل وكيل ReAct Code طريقه من خلال السؤال التالي.
+
+```py3
+>>> agent.run(
+...     "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?",
+... )
+=====New task=====
+How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?
+====Agent is executing the code below:
+bert_blocks = search(query="number of blocks in BERT base encoder")
+print("BERT blocks:", bert_blocks)
+====
+Print outputs:
+BERT blocks: twelve encoder blocks
+
+====Agent is executing the code below:
+attention_layer = search(query="number of layers in Attention is All You Need")
+print("Attention layers:", attention_layer)
+====
+Print outputs:
+Attention layers: Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- 2 Page 3 Figure 1: The Transformer - model architecture.
+
+====Agent is executing the code below:
+bert_blocks = 12
+attention_layers = 6
+diff = bert_blocks - attention_layers
+print("Difference in blocks:", diff)
+final_answer(diff)
+====
+
+Print outputs:
+Difference in blocks: 6
+
+Final answer: 6
+```
+
+### كيف يمكنني بناء وكيل؟
+
+لتهيئة وكيل، تحتاج إلى هذه الوسائط:
+
+- نموذج لغوي كبير (LLM) يشكل المحرك الأساسي للوكيل. الوكيل نفسه ليس النموذج اللغوي، بل هو برنامج يستخدم النموذج اللغوي كمحرك له.
+- موجه النظام (system prompt): هذه هي التعليمات التي يتم إعطاؤها للنموذج اللغوي لإنشاء مخرجاته.
+- صندوق أدوات (toolbox) يختار الوكيل منه الأدوات لتنفيذها
+- محلل (parser) لاستخراج الأدوات التي يجب استدعاؤها من مخرجات النموذج اللغوي LLM والأدوات التي يجب استخدامها
+
+عند تهيئة نظام الوكيل، يتم استخدام سمات الأداة لإنشاء وصف للأداة، ثم يتم دمجها في موجه النظام الخاص `system_prompt` للوكيل لإعلامه بالأدوات التي يمكنه استخدامها ولماذا.
+
+للبدء، يرجى تثبيت `agents` الإضافية لتثبيت جميع التبعيات الافتراضية.
+
+```bash
+pip install transformers[agents]
+```
+
+قم ببناء محرك LLM الخاص بك من خلال تعريف طريقة `llm_engine` التي تقبل قائمة من [الرسائل](./chat_templating.) وتعيد النص. يجب أن تقبل هذه الدالة القابلة للاستدعاء أيضًا معامل `stop` يشير إلى متى يجب التوقف عن التوليد.
+
+```python
+from huggingface_hub import login, InferenceClient
+
+login("<YOUR_HUGGINGFACEHUB_API_TOKEN>")
+
+client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct")
+
+def llm_engine(messages, stop_sequences=["Task"]) -> str:
+    response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000)
+    answer = response.choices[0].message.content
+    return answer
+```
+
+يمكنك استخدام أي طريقة `llm_engine` طالما أنها:
+1. يتبع تنسيق [رسائل](./chat_templating.md) لإدخاله (`List [Dict [str، str]]`) ويعيد `str`
+2. يتوقف عن توليد المخراجات من التسلسلات التي تم تمريرها في معامل `stop`
+
+أنت بحاجة أيضًا إلى معامل "الأدوات" الذي يقبل قائمة من "الأدوات". يمكنك توفير قائمة فارغة لـ "الأدوات"، ولكن استخدم صندوق الأدوات الافتراضي مع معامل اختياري `add_base_tools=True`.
+
+الآن يمكنك إنشاء وكيل، مثل [`CodeAgent`], وتشغيله. ولتسهيل الأمر، نقدم أيضًا فئة [`HfEngine`] التي تستخدم `huggingface_hub.InferenceClient` بشكل مخفى.
+
+```python
+from transformers import CodeAgent, HfEngine
+
+llm_engine = HfEngine(model="meta-llama/Meta-Llama-3-70B-Instruct")
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+
+agent.run(
+    "Could you translate this sentence from French, say it out loud and return the audio.",
+    sentence="Où est la boulangerie la plus proche?",
+)
+```
+
+هذه الميزة ستكون مفيدة في حالة الحاجة الملحة! يمكنك حتى ترك معامل `llm_engine` غير محدد، وسيتم إنشاء [`HfEngine`] بشكل تلقائي.
+
+```python
+from transformers import CodeAgent
+
+agent = CodeAgent(tools=[], add_base_tools=True)
+
+agent.run(
+    "Could you translate this sentence from French, say it out loud and give me the audio.",
+    sentence="Où est la boulangerie la plus proche?",
+)
+```
+
+لاحظ أننا استخدمنا معامل "sentence" إضافي: يمكنك تمرير النص كمعامل إضافي إلى النموذج.
+
+يمكنك أيضًا استخدام هذا للإشارة إلى مسار الملفات المحلية أو البعيدة للنموذج لاستخدامها:
+
+```py
+from transformers import ReactCodeAgent
+
+agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+
+agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3")
+```
+
+
+تم تحديد موجه النظام ومحلل المخرجات تلقائيًا، ولكن يمكنك فحصهما بسهولة عن طريق استدعاء `system_prompt_template` على وكيلك.
+
+```python
+print(agent.system_prompt_template)
+```
+
+من المهم أن تشرح بأكبر قدر ممكن من الوضوح المهمة التي تريد تنفيذها.
+كل عملية [`~Agent.run`] مستقلة، وبما أن الوكيل مدعوم من LLM، فقد تؤدي الاختلافات الطفيفة في موجهك إلى نتائج مختلفة تمامًا.
+يمكنك أيضًا تشغيل وكيل بشكل متتالي لمهام مختلفة: في كل مرة يتم فيها إعادة تهيئة سمتي `agent.task` و`agent.logs`.
+
+
+#### تنفيذ التعليمات البرمجية
+
+يقوم مفسر Python بتنفيذ التعليمات البرمجية على مجموعة من المدخلات التي يتم تمريرها جنبًا إلى جنب مع أدواتك.
+يجب أن يكون هذا الأمر آمنًا لأن الوظائف الوحيدة التي يمكن استدعاؤها هي الأدوات التي قدمتها (خاصة إذا كانت أدوات من Hugging Face فقط) ووظيفة الطباعة، لذا فأنت مقيد بالفعل بما يمكن تنفيذه.
+
+مفسر Python لا يسمح أيضًا باستدعاء دوال بشكل افتراضي خارج قائمة آمنة، لذا فإن جميع الهجمات الأكثر وضوحًا لا ينبغي أن تكون مشكلة.
+يمكنك أيضًا الإذن باستيرادات إضافية عن طريق تمرير الوحدات النمطية المصرح بها كقائمة من السلاسل في معامل  `additional_authorized_imports` عند تهيئة [`ReactCodeAgent`] أو [`CodeAgent`]:
+
+```py
+>>> from transformers import ReactCodeAgent
+
+>>> agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4'])
+>>> agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
+
+(...)
+'Hugging Face – Blog'
+```
+
+سيتم إيقاف التنفيذ عند أي رمز يحاول تنفيذ عملية غير قانونية أو إذا كان هناك خطأ Python عادي في التعليمات البرمجية التي تم إنشاؤها بواسطة الوكيل.
+
+> [!WARNING]
+> يمكن لـ LLM توليد شفرة برمجية عشوائية سيتم تنفيذها بعد ذلك: لا تقمب استدعاء أى دوال غير آمنة!
+
+### موجه النظام
+
+ينشئ الوكيل، أو بالأحرى LLM الذي يقود الوكيل، يولد مخرجات بناءً على موجه النظام. يمكن تخصيص موجه النظام وتصميمه للمهام المقصودة. على سبيل المثال، تحقق من موجه النظام لـ [`ReactCodeAgent`] (الإصدار أدناه مبسط قليلاً).
+
+```text
+You will be given a task to solve as best you can.
+You have access to the following tools:
+<<tool_descriptions>>
+
+To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
+
+At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use.
+Then in the 'Code:' sequence, you shold write the code in simple Python. The code sequence must end with '/End code' sequence.
+During each intermediate step, you can use 'print()' to save whatever important information you will then need.
+These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step.
+
+In the end you have to return a final answer using the `final_answer` tool.
+
+Here are a few examples using notional tools:
+---
+{examples}
+
+Above example were using notional tools that might not exist for you. You only have acces to those tools:
+<<tool_names>>
+You also can perform computations in the python code you generate.
+
+Always provide a 'Thought:' and a 'Code:\n```py' sequence ending with '```<end_code>' sequence. You MUST provide at least the 'Code:' sequence to move forward.
+
+Remember to not perform too many operations in a single code block! You should split the task into intermediate code blocks.
+Print results at the end of each step to save the intermediate results. Then use final_answer() to return the final result.
+
+Remember to make sure that variables you use are all defined.
+
+Now Begin!
+```
+
+يتضمن موجه النظام:
+- *مقدمة* تشرح كيف يجب أن يتصرف الوكيل والأدوات التي يجب عليه استخدامها.
+- وصف لجميع الأدوات التي يتم تحديدها بواسطة رمز `<<tool_descriptions>>` الذي يتم استبداله ديناميكيًا في وقت التشغيل بالأدوات التي يحددها المستخدم أو يختارها.
+    - يأتي وصف الأداة من سمات الأداة، `name`، و`description`، و`inputs` و`output_type`، وقالب `jinja2` بسيط يمكنك تحسينه.
+- شكل المخرج المتوقع.
+
+يمكنك تحسين موجه النظام، على سبيل المثال، عن طريق إضافة شرح لتنسيق المخرجات.
+
+للحصول على أقصى قدر من المرونة، يمكنك الكتابة فوق قالب موجه النظام بالكامل عن طريق تمرير موجه مخصص كمعامل إلى معلمة `system_prompt`.
+
+```python
+from transformers import ReactJsonAgent
+from transformers.agents import PythonInterpreterTool
+
+agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}")
+```
+
+> [!WARNING]
+> يرجى التأكد من تحديد سلسلة `<<tool_descriptions>>` في مكان ما في `template` حتى يكون الوكيل على علم 
+بالأدوات المتاحة.
+
+
+### فحص تشغيل الوكيل
+
+فيما يلي بعض السمات المفيدة لفحص ما حدث بعد التشغيل:
+- تخزن  `agent.logs` سجلات مفصلة للوكيل. في كل خطوة من تشغيل الوكيل، يتم تخزين كل شيء في قاموس إلحاقه بـ `agent.logs`.
+- تشغيل `agent.write_inner_memory_from_logs()` يخلق ذاكرة داخلية لسجلات الوكيل للنظام LLM لعرضها، كقائمة من رسائل الدردشة. تنتقل هذه الطريقة عبر كل خطوة من سجل الوكيل ولا تخزن سوى ما يهمها كرسالة: على سبيل المثال، سيحفظ موجه النظام والمهمة في رسائل منفصلة، ثم لكل خطوة سيخزن مخرج LLM كرسالة، ومخرج استدعاء الأداة كرسالة أخرى. استخدم هذا إذا كنت تريد عرضًا عامًا لما حدث - ولكن لن يتم نسخ كل سجل بواسطة هذه الطريقة.
+
+## الأدوات
+
+الأداة هي عبارة عن وظيفة أساسية يستخدمها الوكيل لتنفيذ مهمة محددة.
+
+يمكنك على سبيل المثال التحقق من [`PythonInterpreterTool`]: لديه اسم ووصف ووصف للمدخلات ونوع للمخرج، وطريقة `__call__` التي تقوم بتنفيذ المهمة المطلوبة.
+
+عند تهيئة الوكيل، يتم استخدام سمات الأداة لتوليد وصف للأداة يتم تضمينه في موجه النظام الخاص بالوكيل. يتيح هذا للوكيل معرفة الأدوات التي يمكنه استخدامها ولماذا.
+
+### صندوق الأدوات الافتراضي
+
+يأتي Transformers مع صندوق أدوات افتراضي لتمكين الوكلاء، والذي يمكنك إضافته إلى وكيلك عند التهيئة باستخدام معامل `add_base_tools = True`:
+
+- **الإجابة على أسئلة المستند**: الإجابة على سؤال حول المستند (مثل ملف PDF) بتنسيق صورة ([Donut](./model_doc/donut))
+- **الإجابة على أسئلة الصور**: الإجابة على سؤال حول صورة ([VILT](./model_doc/vilt))
+- **التحدث إلى النص**: قم بتفريغ الكلام إلى نص ([Whisper](./model_doc/whisper))
+- **النص إلى كلام**: تحويل النص إلى كلام ([SpeechT5](./model_doc/speecht5))
+- **الترجمة**: ترجمة جملة معينة من لغة المصدر إلى لغة الهدف.
+- **مفسر كود Python**: تشغيل كود Python الذي تم إنشاؤه بواسطة LLM في بيئة آمنة. لن يتم إضافة هذه الأداة إلى [`ReactJsonAgent`] إلا إذا استخدمت `add_base_tools=True`، نظرًا لأن الأدوات المستندة إلى التعليمات البرمجية يمكنها بالفعل تنفيذ كود Python
+لا تترجم النصوص الخاصة ولا الأكواد البرمجية ولا الروابط ولا رموز HTML وCSS:
+
+يمكنك استخدام أداة يدويًا عن طريق استدعاء دالة [`load_tool`] وتحديد مهمة لتنفيذها.
+
+```python
+from transformers import load_tool
+
+tool = load_tool("text-to-speech")
+audio = tool("This is a text to speech tool")
+```
+
+### إنشاء أداة جديدة
+
+يمكنك إنشاء أداتك الخاصة لتغطية حالات الاستخدام التي لا تغطيها الأدوات الافتراضية من Hugging Face.
+على سبيل المثال، دعنا نقوم بإنشاء أداة تعرض النموذج الأكثر تنزيلًا لمهمة معينة من Hub.
+
+سوف نبدأ بالكود التالي.
+
+```python
+from huggingface_hub import list_models
+
+task = "text-classification"
+
+model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+print(model.id)
+```
+
+يمكن تحويل هذه الشيفرة إلى فئة ترث من الفئة العليا [`Tool`].
+
+تحتاج الأداة المخصصة إلى:
+
+- اسم `name`، والتي تمثل اسم الأداة نفسها. عادةً ما يصف الاسم وظيفتها. بما أن الكود يعيد النموذج الأكثر تنزيلًا لمهمة ما، فلنسمها `model_download_counter`.
+- تستخدم خاصية `description` لملء موجه نظام الوكيل.
+- خاصية `inputs`، والتي هي عبارة عن قاموس بمفاتيح "type" و"description". يحتوي على معلومات تساعد المفسر Python على اتخاذ خيارات مستنيرة بشأن المدخلات.
+- خاصية `output_type`، والتي تحدد نوع المخرج.
+- طريقة `forward` والتي تحتوي على الكود الذي سيتم تنفيذه للحصول على النتيجة النهائية.
+
+```python
+from transformers import Tool
+from huggingface_hub import list_models
+
+class HFModelDownloadsTool(Tool):
+    name = "model_download_counter"
+    description = (
+        "This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. "
+        "It returns the name of the checkpoint."
+    )
+
+    inputs = {
+        "task": {
+            "type": "text",
+            "description": "the task category (such as text-classification, depth-estimation, etc)",
+        }
+    }
+    output_type = "text"
+
+    def forward(self, task: str):
+        model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+        return model.id
+```
+
+الآن بعد أن أصبحت فئة `HfModelDownloadsTool` المخصصة جاهزة، يمكنك حفظها في ملف باسم `model_downloads.py` واستيرادها للاستخدام.
+
+```python
+from model_downloads import HFModelDownloadsTool
+
+tool = HFModelDownloadsTool()
+```
+
+يمكنك أيضًا مشاركة أداتك المخصصة في Hub عن طريق استدعاء [`~Tool.push_to_hub`] على الأداة. تأكد من أنك قمت بإنشاء مستودع لها على Hub وأنك تستخدم رمز وصول للقراءة.
+
+```python
+tool.push_to_hub("{your_username}/hf-model-downloads")
+```
+
+قم بتحميل الأداة باستخدام دالة [`~Tool.load_tool`] ومررها إلى معلمة `tools` في الوكيل الخاص بك.
+
+```python
+from transformers import load_tool, CodeAgent
+
+model_download_tool = load_tool("m-ric/hf-model-downloads")
+agent = CodeAgent(tools=[model_download_tool], llm_engine=llm_engine)
+agent.run(
+    "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?"
+)
+```
+
+ستحصل على ما يلي:
+
+```text
+======== New task ========
+Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?
+==== Agent is executing the code below:
+most_downloaded_model = model_download_counter(task="text-to-video")
+print(f"The most downloaded model for the 'text-to-video' task is {most_downloaded_model}.")
+====
+```
+
+والناتج:
+
+`"النموذج الأكثر تنزيلًا لمهمة `text-to-video` هو ByteDance/AnimateDiff-Lightning."`
+
+### إدارة صندوق أدوات الوكيل الخاص بك
+
+إذا كنت قد قمت بتهيئة وكيل، فمن غير الملائم إعادة تهيئته من البداية لإضافة أداة جديدة ترغب في استخدامها. باستخدام مكتبة Transformers، يمكنك إدارة صندوق أدوات الوكيل بإضافة أو استبدال أداة موجودة.
+
+دعنا نضيف الأداة `model_download_tool` إلى وكيل تم تهيئته مسبقًا باستخدام صندوق الأدوات الافتراضي.
+
+```python
+from transformers import CodeAgent
+
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.toolbox.add_tool(model_download_tool)
+```
+
+الآن يمكننا الاستفادة من الأداة الجديدة وأداة تحويل النص إلى كلام السابقة:
+
+```python
+    agent.run(
+        "Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?"
+    )
+```
+
+| **Audio**                                                                                                                                            |
+|------------------------------------------------------------------------------------------------------------------------------------------------------|
+| <audio controls><source src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/damo.wav" type="audio/wav"/> |
+
+> [!WARNING]
+> احترس عند إضافة أدوات إلى وكيل يعمل بالفعل لأنه يمكن أن يؤثر على اختيار الأداة لصالح أداتك أو اختيار أداة أخرى غير المحددة بالفعل.
+
+استخدم طريقة `agent.toolbox.update_tool()` لاستبدال أداة موجودة في صندوق أدوات الوكيل.
+هذا مفيد إذا كانت أداتك الجديدة بديلاً مباشرًا للأداة الموجودة لأن الوكيل يعرف بالفعل كيفية تنفيذ تلك المهمة المحددة.
+تأكد فقط من اتباع الأداة الجديدة لنفس واجهة برمجة التطبيقات (API) للأداة المستبدلة أو قم بتكييف قالب موجه النظام لضمان تحديث جميع الأمثلة التي تستخدم الأداة المستبدلة.
+
+### استخدام مجموعة من الأدوات
+
+يمكنك الاستفادة من مجموعات الأدوات باستخدام كائن ToolCollection، مع تحديد مجموعة الأدوات التي تريد استخدامها.
+ثم قم بتمريرها كقائمة لتهيئة الوكيل الخاص بك، وبدء استخدامها!
+
+```py
+from transformers import ToolCollection, ReactCodeAgent
+
+image_tool_collection = ToolCollection(collection_slug="huggingface-tools/diffusion-tools-6630bb19a942c2306a2cdb6f")
+agent = ReactCodeAgent(tools=[*image_tool_collection.tools], add_base_tools=True)
+
+agent.run("Please draw me a picture of rivers and lakes.")
+```
+
+لتسريع البداية، يتم تحميل الأدوات فقط إذا استدعاها الوكيل.
+
+ستحصل على هذه الصورة:
+
+<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rivers_and_lakes.png" />
+
+### استخدام gradio-tools
+
+[gradio-tools](https://github.com/freddyaboulton/gradio-tools) هي مكتبة قوية تتيح استخدام Hugging
+Face Spaces كأدوات. تدعم العديد من المساحات الموجودة بالإضافة إلى مساحات مخصصة.
+
+تدعم مكتبة Transformers `gradio_tools` باستخدام طريقة [`Tool.from_gradio`] في الفئة. على سبيل المثال، دعنا نستخدم [`StableDiffusionPromptGeneratorTool`](https://github.com/freddyaboulton/gradio-tools/blob/main/gradio_tools/tools/prompt_generator.py) من مجموعة أدوات `gradio-tools` لتحسين المطالبات لإنشاء صور أفضل.
+
+استورد وقم بتهيئة الأداة، ثم مررها إلى طريقة `Tool.from_gradio`:
+
+```python
+from gradio_tools import StableDiffusionPromptGeneratorTool
+from transformers import Tool, load_tool, CodeAgent
+
+gradio_prompt_generator_tool = StableDiffusionPromptGeneratorTool()
+prompt_generator_tool = Tool.from_gradio(gradio_prompt_generator_tool)
+```
+
+الآن يمكنك استخدامه مثل أي أداة أخرى. على سبيل المثال، دعنا نحسن الموجه `a rabbit wearing a space suit`.
+
+```python
+image_generation_tool = load_tool('huggingface-tools/text-to-image')
+agent = CodeAgent(tools=[prompt_generator_tool, image_generation_tool], llm_engine=llm_engine)
+
+agent.run(
+    "Improve this prompt, then generate an image of it.", prompt='A rabbit wearing a space suit'
+)
+```
+
+يستفيد النموذج بشكل كافٍ من الأداة:
+
+```text
+======== New task ========
+Improve this prompt, then generate an image of it.
+You have been provided with these initial arguments: {'prompt': 'A rabbit wearing a space suit'}.
+==== Agent is executing the code below:
+improved_prompt = StableDiffusionPromptGenerator(query=prompt)
+while improved_prompt == "QUEUE_FULL":
+    improved_prompt = StableDiffusionPromptGenerator(query=prompt)
+print(f"The improved prompt is {improved_prompt}.")
+image = image_generator(prompt=improved_prompt)
+====
+```
+
+قبل إنشاء الصورة أخيرًا:
+
+<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit_spacesuit_flux.webp" />
+
+> [!WARNING]
+> تتطلب gradio-tools إدخالات وإخراجات *نصية* حتى عند العمل مع طرائق مختلفة مثل كائنات الصور والصوت. الإدخالات والإخراجات الصورية والصوتية غير متوافقة حاليًا.
+
+### استخدام أدوات LangChain
+
+نحن نحب Langchain ونعتقد أنها تحتوي على مجموعة أدوات قوية للغاية.
+لاستيراد أداة من LangChain، استخدم الطريقة `from_langchain()`.
+
+فيما يلي كيفية استخدامها لإعادة إنشاء نتيجة البحث في المقدمة باستخدام أداة بحث الويب LangChain.
+
+```python
+from langchain.agents import load_tools
+from transformers import Tool, ReactCodeAgent
+
+search_tool = Tool.from_langchain(load_tools(["serpapi"])[0])
+
+agent = ReactCodeAgent(tools=[search_tool])
+
+agent.run("How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?")
+```
+
+## واجهة Gradio
+
+يمكنك الاستفادة من `gradio.Chatbot` لعرض أفكار الوكيل الخاص بك باستخدام `stream_to_gradio`، إليك مثال:
+
+```py
+import gradio as gr
+from transformers import (
+    load_tool,
+    ReactCodeAgent,
+    HfEngine,
+    stream_to_gradio,
+)
+
+# Import tool from Hub
+image_generation_tool = load_tool("m-ric/text-to-image")
+
+llm_engine = HfEngine("meta-llama/Meta-Llama-3-70B-Instruct")
+
+# Initialize the agent with the image generation tool
+agent = ReactCodeAgent(tools=[image_generation_tool], llm_engine=llm_engine)
+
+
+def interact_with_agent(task):
+    messages = []
+    messages.append(gr.ChatMessage(role="user", content=task))
+    yield messages
+    for msg in stream_to_gradio(agent, task):
+        messages.append(msg)
+        yield messages + [
+            gr.ChatMessage(role="assistant", content="⏳ Task not finished yet!")
+        ]
+    yield messages
+
+
+with gr.Blocks() as demo:
+    text_input = gr.Textbox(lines=1, label="Chat Message", value="Make me a picture of the Statue of Liberty.")
+    submit = gr.Button("Run illustrator agent!")
+    chatbot = gr.Chatbot(
+        label="Agent",
+        type="messages",
+        avatar_images=(
+            None,
+            "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png",
+        ),
+    )
+    submit.click(interact_with_agent, [text_input], [chatbot])
+
+if __name__ == "__main__":
+    demo.launch()
+```
--- a/docs/source/ar/bertology.md
+++ b/docs/source/ar/bertology.md
@ -3,16 +3,16 @@
 يُشهد في الآونة الأخيرة نمو مجال دراسي يُعنى باستكشاف آلية عمل نماذج المحولات الضخمة مثل BERT (والذي يُطلق عليها البعض اسم "BERTology"). ومن الأمثلة البارزة على هذا المجال ما يلي:

 - BERT Rediscovers the Classical NLP Pipeline بواسطة Ian Tenney و Dipanjan Das و Ellie Pavlick:
-  https://huggingface.co/papers/1905.05950
- Are Sixteen Heads Really Better than One? بواسطة Paul Michel و Omer Levy و Graham Neubig: https://huggingface.co/papers/1905.10650
+  https://arxiv.org/abs/1905.05950
+- Are Sixteen Heads Really Better than One? بواسطة Paul Michel و Omer Levy و Graham Neubig: https://arxiv.org/abs/1905.10650
 - What Does BERT Look At? An Analysis of BERT's Attention بواسطة Kevin Clark و Urvashi Khandelwal و Omer Levy و Christopher D.
-  Manning: https://huggingface.co/papers/1906.04341
- CAT-probing: A Metric-based Approach to Interpret How Pre-trained Models for Programming Language Attend Code Structure: https://huggingface.co/papers/2210.04633
+  Manning: https://arxiv.org/abs/1906.04341
+- CAT-probing: A Metric-based Approach to Interpret How Pre-trained Models for Programming Language Attend Code Structure: https://arxiv.org/abs/2210.04633

-لإثراء هذا المجال الناشئ، قمنا بتضمين بعض الميزات الإضافية في نماذج BERT/GPT/GPT-2 للسماح للناس بالوصول إلى التمثيلات الداخلية، والتي تم تكييفها بشكل أساسي من العمل الرائد لـ Paul Michel (https://huggingface.co/papers/1905.10650):
+لإثراء هذا المجال الناشئ، قمنا بتضمين بعض الميزات الإضافية في نماذج BERT/GPT/GPT-2 للسماح للناس بالوصول إلى التمثيلات الداخلية، والتي تم تكييفها بشكل أساسي من العمل الرائد لـ Paul Michel (https://arxiv.org/abs/1905.10650):

 - الوصول إلى جميع الحالات المخفية في BERT/GPT/GPT-2،
 - الوصول إلى جميع أوزان الانتباه لكل رأس في BERT/GPT/GPT-2،
- استرجاع قيم ومشتقات  مخرجات الرأس لحساب درجة أهمية الرأس وحذفه كما هو موضح في https://huggingface.co/papers/1905.10650.
+- استرجاع قيم ومشتقات  مخرجات الرأس لحساب درجة أهمية الرأس وحذفه كما هو موضح في https://arxiv.org/abs/1905.10650.

-ولمساعدتك على فهم واستخدام هذه الميزات بسهولة، أضفنا مثالًا برمجيًا محددًا: [bertology.py](https://github.com/huggingface/transformers-research-projects/tree/main/bertology/run_bertology.py) أثناء استخراج المعلومات  وتقليص من نموذج تم تدريبه مسبقًا على GLUE.
+ولمساعدتك على فهم واستخدام هذه الميزات بسهولة، أضفنا مثالًا برمجيًا محددًا: [bertology.py](https://github.com/huggingface/transformers/tree/main/examples/research_projects/bertology/run_bertology.py) أثناء استخراج المعلومات  وتقليص من نموذج تم تدريبه مسبقًا على GLUE.
--- a/docs/source/ar/custom_models.md
+++ b/docs/source/ar/custom_models.md
@ -30,7 +30,7 @@ class ResnetConfig(PretrainedConfig):
    def __init__(
        self,
        block_type="bottleneck",
-        layers: list[int] = [3, 4, 6, 3],
+        layers: List[int] = [3, 4, 6, 3],
        num_classes: int = 1000,
        input_channels: int = 3,
        cardinality: int = 1,
@ -280,7 +280,7 @@ resnet50d.model.load_state_dict(pretrained_model.state_dict())
 الآن لإرسال النموذج إلى Hub، تأكد من تسجيل الدخول. إما تشغيل في المحطة الأوامر الطرفية الخاصة بك:

 ```bash
-hf auth login
+huggingface-cli login
 ```

 أو من دفتر ملاحظات:
--- a/docs/source/ar/gguf.md
+++ b/docs/source/ar/gguf.md
@ -77,7 +77,7 @@ model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)

 الآن لديك إمكانية الوصول إلى النسخة الكامل غير المكممة للنموذج في بيئة PyTorch، حيث يمكنك دمجه مع مجموعة كبيرة من الأدوات الأخرى.

-لإعادة التحويل إلى ملف `gguf`، نوصي باستخدام ملف [`convert-hf-to-gguf.py`](https://github.com/ggerganov/llama.cpp/blob/master/convert_hf_to_gguf.py) من llama.cpp.
+لإعادة التحويل إلى ملف `gguf`، نوصي باستخدام ملف [`convert-hf-to-gguf.py`](https://github.com/ggerganov/llama.cpp/blob/master/convert-hf-to-gguf.py) من llama.cpp.

 فيما يلي كيفية إكمال البرنامج النصي أعلاه لحفظ النموذج وإعادة تصديره مرة أخرى إلى `gguf`:

--- a/docs/source/ar/glossary.md
+++ b/docs/source/ar/glossary.md
@ -135,7 +135,7 @@
 في كل وحدة الانتباه الباقية في المحولات، تلي طبقة الاهتمام الانتباه عادة طبقتان للتغذية الأمامية.
 حجم تضمين الطبقة الأمامية الوسيطة أكبر عادة من حجم المخفي للنموذج (على سبيل المثال، لـ
 `google-bert/bert-base-uncased`).
-بالنسبة لإدخال بحجم `[batch_size, sequence_length]`، يمكن أن تمثل الذاكرة المطلوبة لتخزين التضمينات الأمامية الوسيطة `[batch_size، sequence_length, config.intermediate_size]` جزءًا كبيرًا من استخدام الذاكرة. لاحظ مؤلفو (https://huggingface.co/papers/2001.04451)[Reformer: The Efficient Transformer] أنه نظرًا لأن الحساب مستقل عن بعد `sequence_length`، فإنه من المكافئ رياضيًا حساب تضمينات الإخراج الأمامية `[batch_size، config.hidden_size]_0, ..., [batch_size، `config_size]_n
+بالنسبة لإدخال بحجم `[batch_size, sequence_length]`، يمكن أن تمثل الذاكرة المطلوبة لتخزين التضمينات الأمامية الوسيطة `[batch_size، sequence_length, config.intermediate_size]` جزءًا كبيرًا من استخدام الذاكرة. لاحظ مؤلفو (https://arxiv.org/abs/2001.04451)[Reformer: The Efficient Transformer] أنه نظرًا لأن الحساب مستقل عن بعد `sequence_length`، فإنه من المكافئ رياضيًا حساب تضمينات الإخراج الأمامية `[batch_size، config.hidden_size]_0, ..., [batch_size، `config_size]_n
 فردياً والتوصيل بها لاحقًا إلى `[batch_size, sequence_length, config.hidden_size]` مع `n = sequence_length`، والذي يتداول زيادة وقت الحساب مقابل تقليل استخدام الذاكرة، ولكنه ينتج عنه نتيجة مكافئة رياضيا.

 بالنسبة للنماذج التي تستخدم الدالة `[apply_chunking_to_forward]`، يحدد `chunk_size` عدد التضمينات يتم حساب الإخراج بالتوازي وبالتالي يحدد المقايضة بين حجم الذاكرة والتعقيد الوقت. إذا تم تعيين `chunk_size` إلى `0`، فلن يتم إجراء تجزئة التغذية الأمامية.
@ -173,7 +173,7 @@

 <Youtube id="VFp38yj8h3A"/>

-يعمل كل محلل لغوي بشكل مختلف ولكن الآلية الأساسية تبقى كما هي. إليك مثال باستخدام محلل BERT اللغوي، والذي يعد محلل لغوي [WordPiece](https://huggingface.co/papers/1609.08144):
+يعمل كل محلل لغوي بشكل مختلف ولكن الآلية الأساسية تبقى كما هي. إليك مثال باستخدام محلل BERT اللغوي، والذي يعد محلل لغوي [WordPiece](https://arxiv.org/pdf/1609.08144.pdf):

 ```python
 >>> from transformers import BertTokenizer
--- a/docs/source/ar/llm_tutorial_optimization.md
+++ b/docs/source/ar/llm_tutorial_optimization.md
@ -6,18 +6,18 @@
 تحقق نماذج اللغة الكبيرة (LLMs) مثل GPT3/4، [Falcon](https://huggingface.co/tiiuae/falcon-40b)، و [Llama](https://huggingface.co/meta-llama/Llama-2-70b-hf) تقدمًا سريعًا في قدرتها على معالجة المهام التي تركز على الإنسان، مما يجعلها أدوات أساسية في الصناعات القائمة على المعرفة الحديثة.
 لا يزال نشر هذه النماذج في المهام الواقعية يمثل تحديًا، ومع ذلك:

-   لكي تظهر نماذج اللغة الكبيرة قدرات فهم وتوليد النصوص قريبة من قدرات الإنسان، فإنها تتطلب حاليًا  إلى تكوينها من مليارات المعلمات (انظر [كابلان وآخرون](https://huggingface.co/papers/2001.08361)، [وي وآخرون](https://huggingface.co/papers/2206.07682)). وهذا بدوره يزيد من متطلبات الذاكرة للاستدلال.
+-   لكي تظهر نماذج اللغة الكبيرة قدرات فهم وتوليد النصوص قريبة من قدرات الإنسان، فإنها تتطلب حاليًا  إلى تكوينها من مليارات المعلمات (انظر [كابلان وآخرون](https://arxiv.org/abs/2001.08361)، [وي وآخرون](https://arxiv.org/abs/2206.07682)). وهذا بدوره يزيد من متطلبات الذاكرة للاستدلال.
 -   في العديد من المهام الواقعية، تحتاج نماذج اللغة الكبيرة إلى معلومات سياقية شاملة. يتطلب ذلك قدرة النموذج على إدارة تسلسلات إدخال طويلة للغاية أثناء الاستدلال.

 يكمن جوهر صعوبة هذه التحديات في تعزيز القدرات الحسابية والذاكرة لنماذج اللغة الكبيرة، خاصة عند التعامل مع تسلسلات الإدخال الضخمة.

 في هذا الدليل، سنستعرض التقنيات الفعالة لتُحسِّن من كفاءة نشر نماذج اللغة الكبيرة:

-1. سنتناول تقنية "دقة أقل" التي أثبتت الأبحاث فعاليتها في تحقيق مزايا حسابية دون التأثير بشكل ملحوظ على أداء النموذج عن طريق العمل بدقة رقمية أقل [8 بت و4 بت](/main_classes/quantization).
+1. سنتناول تقنية "دقة أقل" التي أثبتت الأبحاث فعاليتها في تحقيق مزايا حسابية دون التأثير بشكل ملحوظ على أداء النموذج عن طريق العمل بدقة رقمية أقل [8 بت و4 بت](/main_classes/quantization.md).

 2.  **اFlash Attention:** إن Flash Attention وهي نسخة مُعدَّلة من خوارزمية الانتباه التي لا توفر فقط نهجًا أكثر كفاءة في استخدام الذاكرة، ولكنها تحقق أيضًا كفاءة متزايدة بسبب الاستخدام الأمثل لذاكرة GPU.

-3.  **الابتكارات المعمارية:** حيث تم اقتراح هياكل متخصصة تسمح باستدلال أكثر فعالية نظرًا لأن نماذج اللغة الكبيرة يتم نشرها دائمًا بنفس الطريقة أثناء عملية الاستدلال، أي توليد النص التنبؤي التلقائي مع سياق الإدخال الطويل، فقد تم اقتراح بنيات نموذج متخصصة تسمح بالاستدلال الأكثر كفاءة. أهم تقدم في بنيات النماذج هنا هو [عذر](https://huggingface.co/papers/2108.12409)، [الترميز الدوار](https://huggingface.co/papers/2104.09864)، [الاهتمام متعدد الاستعلامات (MQA)](https://huggingface.co/papers/1911.02150) و [مجموعة الانتباه بالاستعلام (GQA)](https://huggingface.co/papers/2305.13245).
+3.  **الابتكارات المعمارية:** حيث تم اقتراح هياكل متخصصة تسمح باستدلال أكثر فعالية نظرًا لأن نماذج اللغة الكبيرة يتم نشرها دائمًا بنفس الطريقة أثناء عملية الاستدلال، أي توليد النص التنبؤي التلقائي مع سياق الإدخال الطويل، فقد تم اقتراح بنيات نموذج متخصصة تسمح بالاستدلال الأكثر كفاءة. أهم تقدم في بنيات النماذج هنا هو [عذر](https://arxiv.org/abs/2108.12409)، [الترميز الدوار](https://arxiv.org/abs/2104.09864)، [الاهتمام متعدد الاستعلامات (MQA)](https://arxiv.org/abs/1911.02150) و [مجموعة الانتباه بالاستعلام (GQA)]((https://arxiv.org/abs/2305.13245)).

 على مدار هذا الدليل، سنقدم تحليلًا للتوليد التنبؤي التلقائي من منظور المُوتِّرات. نتعمق في مزايا وعيوب استخدام دقة أقل، ونقدم استكشافًا شاملاً لخوارزميات الانتباه الأحدث، ونناقش بنيات نماذج نماذج اللغة الكبيرة المحسنة. سندعم الشرح بأمثلة عملية تُبرِز كل تحسين على حدة.

@ -152,8 +152,8 @@ from accelerate.utils import release_memory
 release_memory(model)
 ```

-والآن ماذا لو لم يكن لدى وحدة معالجة الرسومات (GPU) لديك 32 جيجا بايت من ذاكرة الفيديو العشوائية (VRAM)؟ لقد وجد أن أوزان النماذج يمكن تحويلها إلى 8 بتات أو 4 بتات دون خسارة كبيرة في الأداء (انظر [Dettmers et al.](https://huggingface.co/papers/2208.07339)).
-يمكن تحويل النموذج إلى 3 بتات أو 2 بتات مع فقدان مقبول في الأداء كما هو موضح في ورقة [GPTQ](https://huggingface.co/papers/2210.17323) 🤯.
+والآن ماذا لو لم يكن لدى وحدة معالجة الرسومات (GPU) لديك 32 جيجا بايت من ذاكرة الفيديو العشوائية (VRAM)؟ لقد وجد أن أوزان النماذج يمكن تحويلها إلى 8 بتات أو 4 بتات دون خسارة كبيرة في الأداء (انظر [Dettmers et al.](https://arxiv.org/abs/2208.07339)).
+يمكن تحويل النموذج إلى 3 بتات أو 2 بتات مع فقدان مقبول في الأداء كما هو موضح في ورقة [GPTQ](https://arxiv.org/abs/2210.17323) 🤯.

 دون الدخول في الكثير من التفاصيل، تهدف مخططات التكميم إلى تخفيض دقة الأوزان مع محاولة الحفاظ على دقة نتائج النموذج كما هي (*أي* أقرب ما يمكن إلى bfloat16).
 لاحظ أن التكميم يعمل بشكل خاص جيدًا لتوليد النص حيث كل ما نهتم به هو اختيار *مجموعة الرموز الأكثر احتمالًا التالية* ولا نهتم حقًا بالقيم الدقيقة لتوزيع الرمز التالي *logit*.
@ -231,7 +231,7 @@ flush()
 دعنا نرى ما هو استهلاك ذاكرة GPU الذروة الذي يوفره تكميم 4 بت. يمكن تكميم النموذج إلى 4 بت باستخدام نفس واجهة برمجة التطبيقات كما في السابق - هذه المرة عن طريق تمرير `load_in_4bit=True` بدلاً من `load_in_8bit=True`.

 ```python
-model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_4bit=True, pad_token_id=0)
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_4bit=True, low_cpu_mem_usage=True, pad_token_id=0)

 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

@ -304,7 +304,7 @@ $$ \textbf{O} = \text{Attn}(\mathbf{X}) = \mathbf{V} \times \text{Softmax}(\math

 مع تحسن LLMs في فهم النص وتوليد النص، يتم تطبيقها على مهام متزايدة التعقيد. في حين أن النماذج كانت تتعامل سابقًا مع ترجمة أو تلخيص بضع جمل، فإنها الآن تدير صفحات كاملة، مما يتطلب القدرة على معالجة أطوال إدخال واسعة.

-كيف يمكننا التخلص من متطلبات الذاكرة الباهظة للتطويلات المدخلة الكبيرة؟ نحن بحاجة إلى طريقة جديدة لحساب آلية الاهتمام الذاتي التي تتخلص من مصفوفة \\( QK^T \\). [طريقه داو وآخرون.](https://huggingface.co/papers/2205.14135) طوروا بالضبط مثل هذا الخوارزمية الجديدة وأطلقوا عليها اسم **Flash Attention**.
+كيف يمكننا التخلص من متطلبات الذاكرة الباهظة للتطويلات المدخلة الكبيرة؟ نحن بحاجة إلى طريقة جديدة لحساب آلية الاهتمام الذاتي التي تتخلص من مصفوفة \\( QK^T \\). [طريقه داو وآخرون.](Https://arxiv.org/abs/2205.14135) طوروا بالضبط مثل هذا الخوارزمية الجديدة وأطلقوا عليها اسم **Flash Attention**.

 باختصار، يكسر الاهتمام الفلاشي حساب \\( \mathbf{V} \times \operatorname{Softmax}(\mathbf{QK}^T\\)) ويحسب بدلاً من ذلك قطعًا أصغر من الإخراج عن طريق التكرار عبر العديد من خطوات حساب Softmax:

@ -318,7 +318,7 @@ $$ \textbf{O}_i \leftarrow s^a_{ij} * \textbf{O}_i + s^b_{ij} * \mathbf{V}_{j} \

 > من خلال تتبع إحصائيات التطبيع softmax واستخدام بعض الرياضيات الذكية، يعطي Flash Attention **مخرجات متطابقة رقميًا** مقارنة بطبقة الاهتمام الذاتي الافتراضية بتكلفة ذاكرة لا تزيد خطيًا مع \\( N \\).

-عند النظر إلى الصيغة، قد يقول المرء بديهيًا أن الاهتمام الفلاشي يجب أن يكون أبطأ بكثير مقارنة بصيغة الاهتمام الافتراضية حيث يلزم إجراء المزيد من الحسابات. في الواقع، يتطلب Flash Attention المزيد من عمليات الفاصلة العائمة مقارنة بالاهتمام العادي حيث يجب إعادة حساب إحصائيات التطبيع softmax باستمرار (راجع [الورقة](https://huggingface.co/papers/2205.14135) لمزيد من التفاصيل إذا كنت مهتمًا)
+عند النظر إلى الصيغة، قد يقول المرء بديهيًا أن الاهتمام الفلاشي يجب أن يكون أبطأ بكثير مقارنة بصيغة الاهتمام الافتراضية حيث يلزم إجراء المزيد من الحسابات. في الواقع، يتطلب Flash Attention المزيد من عمليات الفاصلة العائمة مقارنة بالاهتمام العادي حيث يجب إعادة حساب إحصائيات التطبيع softmax باستمرار (راجع [الورقة](https://arxiv.org/abs/2205.14135) لمزيد من التفاصيل إذا كنت مهتمًا)

 > ومع ذلك، فإن الاهتمام الفلاشي أسرع بكثير في الاستدلال مقارنة بالاهتمام الافتراضي الذي يأتي من قدرته على تقليل الطلبات على ذاكرة GPU الأبطأ ذات النطاق الترددي العالي (VRAM)، والتركيز بدلاً من ذلك على ذاكرة SRAM الأسرع الموجودة على الشريحة.

@ -535,20 +535,20 @@ flush()
 لكي يفهم LLM ترتيب الجملة، يلزم وجود *إشارة* إضافية ويتم تطبيقها عادةً في شكل *الترميزات الموضعية* (أو ما يُطلق عليه أيضًا *الترميزات الموضعية*).
 لم يتم ترجمة النص الخاص والروابط وأكواد HTML وCSS بناءً على طلبك.

-قدم مؤلفو الورقة البحثية [*Attention Is All You Need*](https://huggingface.co/papers/1706.03762) تضمينات موضعية جيبية مثلثية \\( \mathbf{P} = \mathbf{p}_1, \ldots, \mathbf{p}_N \\) حيث يتم حساب كل متجه \\( \mathbf{p}_i \\) كدالة جيبية لموضعه \\( i \\) .
+قدم مؤلفو الورقة البحثية [*Attention Is All You Need*](https://arxiv.org/abs/1706.03762) تضمينات موضعية جيبية مثلثية \\( \mathbf{P} = \mathbf{p}_1, \ldots, \mathbf{p}_N \\) حيث يتم حساب كل متجه \\( \mathbf{p}_i \\) كدالة جيبية لموضعه \\( i \\) .
 بعد ذلك يتم ببساطة إضافة التضمينات الموضعية إلى متجهات تسلسل الإدخال \\( \mathbf{\hat{X}} = \mathbf{\hat{x}}_1, \ldots, \mathbf{\hat{x}}_N \\) = \\( \mathbf{x}_1 + \mathbf{p}_1, \ldots, \mathbf{x}_N + \mathbf{p}_N \\) وبالتالي توجيه النموذج لتعلم ترتيب الجملة بشكل أفضل.

-بدلاً من استخدام التضمينات الموضعية الثابتة، استخدم آخرون (مثل [Devlin et al.](https://huggingface.co/papers/1810.04805)) تضمينات موضعية مكتسبة يتم من خلالها تعلم التضمينات الموضعية \\( \mathbf{P} \\) أثناء التدريب.
+بدلاً من استخدام التضمينات الموضعية الثابتة، استخدم آخرون (مثل [Devlin et al.](https://arxiv.org/abs/1810.04805)) تضمينات موضعية مكتسبة يتم من خلالها تعلم التضمينات الموضعية \\( \mathbf{P} \\) أثناء التدريب.

 كانت التضمينات الموضعية الجيبية والمكتسبة هي الطرق السائدة لترميز ترتيب الجملة في نماذج اللغة الكبيرة، ولكن تم العثور على بعض المشكلات المتعلقة بهذه التضمينات الموضعية:

-1. التضمينات الموضعية الجيبية والمكتسبة هي تضمينات موضعية مطلقة، أي ترميز تضمين فريد لكل معرف موضعي: \\( 0, \ldots, N \\) . كما أظهر [Huang et al.](https://huggingface.co/papers/2009.13658) و [Su et al.](https://huggingface.co/papers/2104.09864)، تؤدي التضمينات الموضعية المطلقة إلى أداء ضعيف لنماذج اللغة الكبيرة للمدخلات النصية الطويلة. بالنسبة للمدخلات النصية الطويلة، يكون من المفيد إذا تعلم النموذج المسافة الموضعية النسبية التي تمتلكها رموز المدخلات إلى بعضها البعض بدلاً من موضعها المطلق.
+1. التضمينات الموضعية الجيبية والمكتسبة هي تضمينات موضعية مطلقة، أي ترميز تضمين فريد لكل معرف موضعي: \\( 0, \ldots, N \\) . كما أظهر [Huang et al.](https://arxiv.org/abs/2009.13658) و [Su et al.](https://arxiv.org/abs/2104.09864)، تؤدي التضمينات الموضعية المطلقة إلى أداء ضعيف لنماذج اللغة الكبيرة للمدخلات النصية الطويلة. بالنسبة للمدخلات النصية الطويلة، يكون من المفيد إذا تعلم النموذج المسافة الموضعية النسبية التي تمتلكها رموز المدخلات إلى بعضها البعض بدلاً من موضعها المطلق.
 2. عند استخدام التضمينات الموضعية المكتسبة، يجب تدريب نموذج اللغة الكبيرة على طول إدخال ثابت \\( N \\)، مما يجعل من الصعب الاستقراء إلى طول إدخال أطول مما تم تدريبه عليه.

 في الآونة الأخيرة، أصبحت التضمينات الموضعية النسبية التي يمكنها معالجة المشكلات المذكورة أعلاه أكثر شعبية، وأبرزها:

-   [تضمين الموضع الدوراني (RoPE)](https://huggingface.co/papers/2104.09864)
-   [ALiBi](https://huggingface.co/papers/2108.12409)
+-   [تضمين الموضع الدوراني (RoPE)](https://arxiv.org/abs/2104.09864)
+-   [ALiBi](https://arxiv.org/abs/2108.12409)

 يؤكد كل من *RoPE* و *ALiBi* أنه من الأفضل توجيه نموذج اللغة الكبيرة حول ترتيب الجملة مباشرة في خوارزمية الانتباه الذاتي حيث يتم وضع رموز الكلمات في علاقة مع بعضها البعض. على وجه التحديد، يجب توجيه ترتيب الجملة عن طريق تعديل عملية \\( \mathbf{QK}^T \\) .

@ -563,14 +563,14 @@ $$ \mathbf{\hat{q}}_i^T \mathbf{\hat{x}}_j = \mathbf{{q}}_i^T \mathbf{R}_{\theta
 يستخدم *RoPE* في العديد من نماذج اللغة الكبيرة الأكثر أهمية اليوم، مثل:

 -   [**Falcon**](https://huggingface.co/tiiuae/falcon-40b)
-   [**Llama**](https://huggingface.co/papers/2302.13971)
-   [**PaLM**](https://huggingface.co/papers/2204.02311)
+-   [**Llama**](https://arxiv.org/abs/2302.13971)
+-   [**PaLM**](https://arxiv.org/abs/2204.02311)

 كبديل، يقترح *ALiBi* مخطط ترميز موضعي نسبي أبسط بكثير. يتم إضافة المسافة النسبية التي تمتلكها رموز المدخلات إلى بعضها البعض كعدد صحيح سلبي مقياس بقيمة محددة مسبقًا `m` إلى كل إدخال استعلام-مفتاح لمصفوفة \\( \mathbf{QK}^T \\) مباشرة قبل حساب softmax.

 ![](/blog/assets/163_optimize_llm/alibi.png)

-كما هو موضح في ورقة [ALiBi](https://huggingface.co/papers/2108.12409)، يسمح هذا الترميز الموضعي النسبي البسيط للنموذج بالحفاظ على أداء عالٍ حتى في تسلسلات المدخلات النصية الطويلة جدًا.
+كما هو موضح في ورقة [ALiBi](https://arxiv.org/abs/2108.12409)، يسمح هذا الترميز الموضعي النسبي البسيط للنموذج بالحفاظ على أداء عالٍ حتى في تسلسلات المدخلات النصية الطويلة جدًا.

 يُستخدم *ALiBi* في العديد من أهم نماذج اللغة الكبيرة المستخدمة اليوم، مثل:

@ -579,7 +579,7 @@ $$ \mathbf{\hat{q}}_i^T \mathbf{\hat{x}}_j = \mathbf{{q}}_i^T \mathbf{R}_{\theta

 يمكن لكل من ترميزات الموضع *RoPE* و *ALiBi* الاستقراء إلى أطوال إدخال لم يتم ملاحظتها أثناء التدريب، في حين ثبت أن الاستقراء يعمل بشكل أفضل بكثير خارج الصندوق لـ *ALiBi* مقارنة بـ *RoPE*.
 بالنسبة لـ ALiBi، ما عليك سوى زيادة قيم مصفوفة الموضع المثلث السفلي لمطابقة طول تسلسل الإدخال.
-بالنسبة لـ *RoPE*، يؤدي الحفاظ على نفس \\( \theta \\) الذي تم استخدامه أثناء التدريب إلى نتائج سيئة عند تمرير إدخالات نصية أطول بكثير من تلك التي شوهدت أثناء التدريب، راجع [Press et al.](https://huggingface.co/papers/2108.12409). ومع ذلك، وجد المجتمع بعض الحيل الفعالة التي تقوم بتعديل \\( \theta \\)، مما يسمح لترميزات الموضع *RoPE* بالعمل بشكل جيد لتسلسلات إدخال النص المستقرئة (راجع [هنا](https://github.com/huggingface/transformers/pull/24653)).
+بالنسبة لـ *RoPE*، يؤدي الحفاظ على نفس \\( \theta \\) الذي تم استخدامه أثناء التدريب إلى نتائج سيئة عند تمرير إدخالات نصية أطول بكثير من تلك التي شوهدت أثناء التدريب، راجع [Press et al.](https://arxiv.org/abs/2108.12409). ومع ذلك، وجد المجتمع بعض الحيل الفعالة التي تقوم بتعديل \\( \theta \\)، مما يسمح لترميزات الموضع *RoPE* بالعمل بشكل جيد لتسلسلات إدخال النص المستقرئة (راجع [هنا](https://github.com/huggingface/transformers/pull/24653)).

 > كل من RoPE و ALiBi عبارة عن ترميزات موضع نسبي *لا* يتم تعلمها أثناء التدريب، ولكن بدلاً من ذلك تستند إلى الحدس التالي:
 -   يجب إعطاء الإشارات الموضعية حول إدخالات النص مباشرة إلى مصفوفة \\( QK^T \\) لطبقة الاهتمام الذاتي
@ -755,21 +755,21 @@ Roughly 8 مليار قيمة عائمة! يتطلب تخزين 8 مليارات

 #### 3.2.2 Multi-Query-Attention (MQA)

-[Multi-Query-Attention](https://huggingface.co/papers/1911.02150) اقترحها Noam Shazeer في ورقته *Fast Transformer Decoding: One Write-Head is All You Need*. كما يقول العنوان، اكتشف Noam أنه بدلاً من استخدام `n_head` من أوزان إسقاط القيمة الرئيسية، يمكن استخدام زوج واحد من أوزان إسقاط رأس القيمة التي يتم مشاركتها عبر جميع رؤوس الاهتمام دون أن يتدهور أداء النموذج بشكل كبير.
+[Multi-Query-Attention](https://arxiv.org/abs/1911.02150) اقترحها Noam Shazeer في ورقته *Fast Transformer Decoding: One Write-Head is All You Need*. كما يقول العنوان، اكتشف Noam أنه بدلاً من استخدام `n_head` من أوزان إسقاط القيمة الرئيسية، يمكن استخدام زوج واحد من أوزان إسقاط رأس القيمة التي يتم مشاركتها عبر جميع رؤوس الاهتمام دون أن يتدهور أداء النموذج بشكل كبير.

 > باستخدام زوج واحد من أوزان إسقاط رأس القيمة، يجب أن تكون متجهات القيمة الرئيسية \\( \mathbf{k}_i، \mathbf{v}_i \\) متطابقة عبر جميع رؤوس الاهتمام والتي بدورها تعني أننا بحاجة فقط إلى تخزين زوج إسقاط قيمة رئيسي واحد في ذاكرة التخزين المؤقت بدلاً من `n_head` منها.

 نظرًا لأن معظم LLMs تستخدم ما بين 20 و100 رأس اهتمام، فإن MQA يقلل بشكل كبير من استهلاك الذاكرة لذاكرة التخزين المؤقت key-value. بالنسبة إلى LLM المستخدم في هذا الدفتر، يمكننا تقليل استهلاك الذاكرة المطلوبة من 15 جيجابايت إلى أقل من 400 ميجابايت عند طول تسلسل الإدخال 16000.

 بالإضافة إلى توفير الذاكرة، يؤدي MQA أيضًا إلى تحسين الكفاءة الحسابية كما هو موضح في ما يلي.
-في فك التشفير التلقائي، يجب إعادة تحميل متجهات القيمة الرئيسية الكبيرة، ودمجها مع زوج متجه القيمة الحالي، ثم إدخالها في \\( \mathbf{q}_c\mathbf{K}^T \\) الحساب في كل خطوة. بالنسبة لفك التشفير التلقائي، يمكن أن تصبح عرض النطاق الترددي للذاكرة المطلوبة لإعادة التحميل المستمر عنق زجاجة زمنيًا خطيرًا. من خلال تقليل حجم متجهات القيمة الرئيسية، يجب الوصول إلى ذاكرة أقل، وبالتالي تقليل عنق الزجاجة في عرض النطاق الترددي للذاكرة. لمزيد من التفاصيل، يرجى إلقاء نظرة على [ورقة Noam](https://huggingface.co/papers/1911.02150).
+في فك التشفير التلقائي، يجب إعادة تحميل متجهات القيمة الرئيسية الكبيرة، ودمجها مع زوج متجه القيمة الحالي، ثم إدخالها في \\( \mathbf{q}_c\mathbf{K}^T \\) الحساب في كل خطوة. بالنسبة لفك التشفير التلقائي، يمكن أن تصبح عرض النطاق الترددي للذاكرة المطلوبة لإعادة التحميل المستمر عنق زجاجة زمنيًا خطيرًا. من خلال تقليل حجم متجهات القيمة الرئيسية، يجب الوصول إلى ذاكرة أقل، وبالتالي تقليل عنق الزجاجة في عرض النطاق الترددي للذاكرة. لمزيد من التفاصيل، يرجى إلقاء نظرة على [ورقة Noam](https://arxiv.org/abs/1911.02150).

 الجزء المهم الذي يجب فهمه هنا هو أن تقليل عدد رؤوس الاهتمام بالقيمة الرئيسية إلى 1 لا معنى له إلا إذا تم استخدام ذاكرة التخزين المؤقت للقيمة الرئيسية. يظل الاستهلاك الذروي لذاكرة النموذج لمرور واحد للأمام بدون ذاكرة التخزين المؤقت للقيمة الرئيسية دون تغيير لأن كل رأس اهتمام لا يزال لديه متجه استعلام فريد بحيث يكون لكل رأس اهتمام مصفوفة \\( \mathbf{QK}^T \\) مختلفة.

 شهدت MQA اعتمادًا واسع النطاق من قبل المجتمع ويتم استخدامها الآن بواسطة العديد من LLMs الأكثر شهرة:

 -   [**Falcon**](https://huggingface.co/tiiuae/falcon-40b)
-   [**PaLM**](https://huggingface.co/papers/2204.02311)
+-   [**PaLM**](https://arxiv.org/abs/2204.02311)
 -   [**MPT**](https://huggingface.co/mosaicml/mpt-30b)
 -   [**BLOOM**](https://huggingface.co/bigscience/bloom)

@ -777,7 +777,7 @@ Roughly 8 مليار قيمة عائمة! يتطلب تخزين 8 مليارات

 #### 3.2.3 مجموعة الاستعلام الاهتمام (GQA)

-[مجموعة الاستعلام الاهتمام](https://huggingface.co/papers/2305.13245)، كما اقترح Ainslie et al. من Google، وجد أن استخدام MQA يمكن أن يؤدي غالبًا إلى تدهور الجودة مقارنة باستخدام إسقاطات رأس القيمة الرئيسية المتعددة. تجادل الورقة بأنه يمكن الحفاظ على أداء النموذج بشكل أكبر عن طريق تقليل عدد أوزان إسقاط رأس الاستعلام بشكل أقل حدة. بدلاً من استخدام وزن إسقاط قيمة رئيسية واحدة فقط، يجب استخدام `n <n_head` أوزان إسقاط قيمة رئيسية. من خلال اختيار `n` إلى قيمة أقل بكثير من `n_head`، مثل 2 أو 4 أو 8، يمكن الاحتفاظ بمعظم مكاسب الذاكرة والسرعة من MQA مع التضحية بقدر أقل من سعة النموذج وبالتالي، من المفترض، أقل أداء.
+[مجموعة الاستعلام الاهتمام](https://arxiv.org/abs/2305.13245)، كما اقترح Ainslie et al. من Google، وجد أن استخدام MQA يمكن أن يؤدي غالبًا إلى تدهور الجودة مقارنة باستخدام إسقاطات رأس القيمة الرئيسية المتعددة. تجادل الورقة بأنه يمكن الحفاظ على أداء النموذج بشكل أكبر عن طريق تقليل عدد أوزان إسقاط رأس الاستعلام بشكل أقل حدة. بدلاً من استخدام وزن إسقاط قيمة رئيسية واحدة فقط، يجب استخدام `n <n_head` أوزان إسقاط قيمة رئيسية. من خلال اختيار `n` إلى قيمة أقل بكثير من `n_head`، مثل 2 أو 4 أو 8، يمكن الاحتفاظ بمعظم مكاسب الذاكرة والسرعة من MQA مع التضحية بقدر أقل من سعة النموذج وبالتالي، من المفترض، أقل أداء.

 علاوة على ذلك، اكتشف مؤلفو GQA أنه يمكن *تدريب* نقاط تفتيش النموذج الموجودة ليكون لها بنية GQA باستخدام 5% فقط من الحوسبة الأصلية للتعليم المسبق. في حين أن 5% من الحوسبة الأصلية للتعليم المسبق يمكن أن تكون كمية هائلة، يسمح GQA *uptraining* بنقاط تفتيش موجودة للاستفادة من تسلسلات الإدخال الأطول.

@ -789,7 +789,7 @@ Roughly 8 مليار قيمة عائمة! يتطلب تخزين 8 مليارات

 ## الخاتمة

-مجتمع البحث يأتي باستمرار بطرق جديدة ومبتكرة لتسريع وقت الاستدلال للنماذج اللغوية الكبيرة على الإطلاق. كمثال، أحد اتجاهات البحث الواعدة هو [فك التشفير التخميني](https://huggingface.co/papers/2211.17192) حيث تقوم "الرموز السهلة" بإنشائها نماذج اللغة الأصغر والأسرع ويتم إنشاء "الرموز الصعبة" فقط بواسطة LLM نفسه. إن التعمق في التفاصيل يتجاوز نطاق هذا الدفتر، ولكن يمكن قراءته في هذه [تدوينة المدونة اللطيفة](https://huggingface.co/blog/assisted-generation).
+مجتمع البحث يأتي باستمرار بطرق جديدة ومبتكرة لتسريع وقت الاستدلال للنماذج اللغوية الكبيرة على الإطلاق. كمثال، أحد اتجاهات البحث الواعدة هو [فك التشفير التخميني](https://arxiv.org/abs/2211.17192) حيث تقوم "الرموز السهلة" بإنشائها نماذج اللغة الأصغر والأسرع ويتم إنشاء "الرموز الصعبة" فقط بواسطة LLM نفسه. إن التعمق في التفاصيل يتجاوز نطاق هذا الدفتر، ولكن يمكن قراءته في هذه [تدوينة المدونة اللطيفة](https://huggingface.co/blog/assisted-generation).

 السبب في أن LLMs الضخمة مثل GPT3/4، وLlama-2-70b، وClaude، وPaLM يمكن أن تعمل بسرعة كبيرة في واجهات الدردشة مثل [Hugging Face Chat](https://huggingface.co/chat/) أو ChatGPT يرجع إلى حد كبير إلى التحسينات المذكورة أعلاه في الدقة والخوارزميات والهندسة المعمارية.
 في المستقبل، ستكون أجهزة التسريع مثل وحدات معالجة الرسومات (GPUs) ووحدات معالجة الرسومات (TPUs)، وما إلى ذلك... ستكون أسرع فقط وستسمح بمزيد من الذاكرة، ولكن يجب دائمًا التأكد من استخدام أفضل الخوارزميات والهندسة المعمارية المتاحة للحصول على أكبر قدر من المال
--- a/docs/source/ar/model_memory_anatomy.md
+++ b/docs/source/ar/model_memory_anatomy.md
@ -165,7 +165,7 @@ default_args = {

 يمكن أن تكون هذه المعرفة مفيدة لمعرفة عند تحليل اختناقات الأداء.

-هذا الملخص مُشتق من [نقل البيانات هو كل ما تحتاجه: دراسة حالة حول تحسين المحولات 2020](https://huggingface.co/papers/2007.00072)
+هذا الملخص مُشتق من [نقل البيانات هو كل ما تحتاجه: دراسة حالة حول تحسين المحولات 2020](https://arxiv.org/abs/2007.00072)


 ## تشريح ذاكرة النموذج
--- a/docs/source/ar/model_sharing.md
+++ b/docs/source/ar/model_sharing.md
@ -41,7 +41,7 @@ picture-in-picture" allowfullscreen></iframe>
 قبل مشاركة نموذج على Hub، ستحتاج إلى بيانات اعتماد حساب Hugging Face الخاصة بك.  إذا كنت تستخدم منصة الأوامر، فقم بتشغيل الأمر التالي في بيئة افتراضية حيث تم تثبيت 🤗 Transformers. سيقوم هذا الأمر بتخزين رمز الدخول الخاص بك في مجلد تخزين المؤقت لـ Hugging Face (`~/.cache/` بشكل افتراضي):

 ```bash
-hf auth login
+huggingface-cli login
 ```

 إذا كنت تستخدم دفتر ملاحظات مثل Jupyter أو Colaboratory، فتأكد من تثبيت مكتبة [`huggingface_hub`](https://huggingface.co/docs/hub/adding-a-library). تسمح لك هذه المكتبة بالتفاعل برمجيًا مع Hub.
--- a/docs/source/ar/model_summary.md
+++ b/docs/source/ar/model_summary.md
@ -1,6 +1,6 @@
 # عائلة نماذج المحول

-منذ إطلاقه في عام 2017، ألهم نموذج [المحول الأصلي](https://huggingface.co/papers/1706.03762) (راجع مدونة [المحول المشروح](http://nlp.seas.harvard.edu/2018/04/03/attention.html) لمقدمة تقنية مبسطة)، ألهم العديد من النماذج الجديدة والمبتكرة التي تتجاوز مهام معالجة اللغات الطبيعية (NLP). هناك نماذج للتنبؤ [بالبنية البروتينات المطوية](https://huggingface.co/blog/deep-learning-with-proteins)، و[تدريب على اتخاذ القرار](https://huggingface.co/blog/train-decision-transformers)، و[التنبؤ بالسلاسل الزمنية](https://huggingface.co/blog/time-series-transformers). مع وجود العديد من متغيرات المحول المتاحة، قد يكون من السهل أن تفوتك الصورة الأكبر. ما تشترك فيه جميع هذه النماذج هو أنها تستند إلى بنية المحول الأصلية. تستخدم بعض النماذج فقط الترميز أو فك الترميز، بينما تستخدم نماذج أخرى كليهما. يوفر هذا تصنيفًا مفيدًا لتصنيف واستعراض الفروقات الرئيسية بين نماذج عائلة المحولات، وسيساعدك على فهم النماذج التي لم تصادفها من قبل.
+منذ إطلاقه في عام 2017، ألهم نموذج [المحول الأصلي](https://arxiv.org/abs/1706.03762) (راجع مدونة [المحول المشروح](http://nlp.seas.harvard.edu/2018/04/03/attention.html) لمقدمة تقنية مبسطة)، ألهم العديد من النماذج الجديدة والمبتكرة التي تتجاوز مهام معالجة اللغات الطبيعية (NLP). هناك نماذج للتنبؤ [بالبنية البروتينات المطوية](https://huggingface.co/blog/deep-learning-with-proteins)، و[تدريب على اتخاذ القرار](https://huggingface.co/blog/train-decision-transformers)، و[التنبؤ بالسلاسل الزمنية](https://huggingface.co/blog/time-series-transformers). مع وجود العديد من متغيرات المحول المتاحة، قد يكون من السهل أن تفوتك الصورة الأكبر. ما تشترك فيه جميع هذه النماذج هو أنها تستند إلى بنية المحول الأصلية. تستخدم بعض النماذج فقط الترميز أو فك الترميز، بينما تستخدم نماذج أخرى كليهما. يوفر هذا تصنيفًا مفيدًا لتصنيف واستعراض الفروقات الرئيسية بين نماذج عائلة المحولات، وسيساعدك على فهم النماذج التي لم تصادفها من قبل.

 إذا لم تكن على دراية بنموذج المحول الأصلي أو تحتاج إلى تذكير، فراجع الفصل الخاص بـ [كيف تعمل المحولات](https://huggingface.co/course/chapter1/4؟fw=pt) من دورة Hugging Face.

@ -14,7 +14,7 @@

 ### الشبكة التلافيفية (Convolutional network)

-لطالما كانت الشبكات التلافيفية (CNNs) الطريقة السائدة لمهام رؤية الحاسب حتى برز [محول الرؤية](https://huggingface.co/papers/2010.11929) قابليته للتطوير وكفاءته العالية. وحتى بعد ذلك، لا تزال بعض أفضل صفات CNN، مثل ثبات الإزاحة، قوية جدًا (خاصة بالنسبة لمهام معينة) لدرجة أن بعض المحولات تدمج التلافيف في بنيتها. قلب [ConvNeXt](model_doc/convnext) هذا التبادل رأسًا على عقب وأدرج خيارات التصميم من المحولات لتحديث CNN. على سبيل المثال، يستخدم ConvNeXt نوافذ منزلقة غير متداخلة لتقسيم الصورة إلى رقع وزيادة حقل مجال العام الخاص بها. كما يقوم ConvNeXt بعدة خيارات مثل تصميم الطبقة لتكون أكثر كفاءة في الذاكرة وتحسين الأداء، مما يجعله منافسًا قويًا للمحولات!
+لطالما كانت الشبكات التلافيفية (CNNs) الطريقة السائدة لمهام رؤية الحاسب حتى برز [محول الرؤية](https://arxiv.org/abs/2010.11929) قابليته للتطوير وكفاءته العالية. وحتى بعد ذلك، لا تزال بعض أفضل صفات CNN، مثل ثبات الإزاحة، قوية جدًا (خاصة بالنسبة لمهام معينة) لدرجة أن بعض المحولات تدمج التلافيف في بنيتها. قلب [ConvNeXt](model_doc/convnext) هذا التبادل رأسًا على عقب وأدرج خيارات التصميم من المحولات لتحديث CNN. على سبيل المثال، يستخدم ConvNeXt نوافذ منزلقة غير متداخلة لتقسيم الصورة إلى رقع وزيادة حقل مجال العام الخاص بها. كما يقوم ConvNeXt بعدة خيارات مثل تصميم الطبقة لتكون أكثر كفاءة في الذاكرة وتحسين الأداء، مما يجعله منافسًا قويًا للمحولات!

 ### الترميز[[cv-encoder]] (Encoder)

@ -40,7 +40,7 @@

 نموذج [BERT](model_doc/bert) هو محوّل (Transformer)  يعتمد على الترميز فقط يقوم بشكل عشوائي بإخفاء رموز معينة في المدخلات لتجنب رؤية باقى الرموز الأخرى، مما يسمح له "بالغش". يتمثل هدف التدريب المسبق في التنبؤ بالرمز المخفي بناءً على السياق. يسمح هذا لـ BERT باستخدام السياقات اليمنى واليسرى بالكامل لمساعدته في تعلم تمثيل أعمق وأغنى للبيانات المدخلة. ومع ذلك، كان هناك مجال للتحسين في استراتيجية التدريب المسبق لـ BERT. نموذج [RoBERTa](model_doc/roberta) اضاف تحسين من خلال تقديم وصفة تدريب مسبق جديدة تشمل التدريب لفترة أطول وعلى دفعات أكبر، وإخفاء الرموز عشوائيًا في كل حقبة بدلاً من مرة واحدة فقط أثناء المعالجة المسبقة، وإزالة هدف التنبؤ بالجملة التالية.

-تتمثل الاستراتيجية السائدة لتحسين الأداء في زيادة حجم النموذج. ولكن تدريب النماذج الكبيرة مكلف من الناحية الحسابية. إحدى طرق تقليل التكاليف الحسابية هي استخدام نموذج أصغر مثل [DistilBERT](model_doc/distilbert). يستخدم DistilBERT [ تقنية تقطير المعرفة](https://huggingface.co/papers/1503.02531) - وهي تقنية ضغط - لإنشاء نموذج أصغر من BERT مع الحفاظ على معظم قدراته على فهم اللغةا.
+تتمثل الاستراتيجية السائدة لتحسين الأداء في زيادة حجم النموذج. ولكن تدريب النماذج الكبيرة مكلف من الناحية الحسابية. إحدى طرق تقليل التكاليف الحسابية هي استخدام نموذج أصغر مثل [DistilBERT](model_doc/distilbert). يستخدم DistilBERT [ تقنية تقطير المعرفة](https://arxiv.org/abs/1503.02531) - وهي تقنية ضغط - لإنشاء نموذج أصغر من BERT مع الحفاظ على معظم قدراته على فهم اللغةا.

 مرت معظم نماذج المحول في الاتجاه نحو المزيد من المعلمات، مما أدى إلى ظهور نماذج جديدة تركز على تحسين كفاءة التدريب. يقلّل [ALBERT](model_doc/albert) من استهلاك الذاكرة عن طريق تقليل عدد المعلمات بطريقتين: فصل تضمين المفردات الأكبر إلى مصفوفتين أصغر والسماح للمستويات بمشاركة المعلمات. أضاف [DeBERTa](model_doc/deberta) آلية انتباه منفصلة حيث يتم ترميز الكلمة وموضعها بشكل منفصل في متجهين. يتم حساب الانتباه من هذه المتجهات المنفصلة بدلاً من متجه واحد يحتوي على تضمين الكلمة والموقع. ركز [Longformer](model_doc/longformer) أيضًا على جعل الانتباه أكثر كفاءة، خاصة لمعالجة المستندات ذات تسلسلات أطولل. فهو يستخدم مزيجًا من  انتباه النوافذ المحلية (يتم حساب الانتباه فقط ن نافذة ذات حجم ثابت حول كل رمز) والانتباه العام (فقط لرموز مهمة محددة مثل `[CLS]` للتصنيف) لإنشاء مصفوفة انتباه متفرقة بدلاً من مصفوفة انتباه كاملة.

--- a/docs/source/ar/notebooks.md
+++ b/docs/source/ar/notebooks.md
@ -130,6 +130,7 @@
 | دفتر الملاحظات     |      الوصف      |   |   |
 |:----------|:-------------|:-------------|------:|
 | [كيفية تكميم نموذج باستخدام ONNX Runtime لتصنيف النص](https://github.com/huggingface/notebooks/blob/main/examples/text_classification_quantization_ort.ipynb)| يوضح كيفية تطبيق التكميم الثابت والديناميكي على نموذج باستخدام [ONNX Runtime](https://github.com/microsoft/onnxruntime) لأي مهمة GLUE. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification_quantization_ort.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/text_classification_quantization_ort.ipynb)|
+| [كيفية تكميم نموذج باستخدام Intel Neural Compressor لتصنيف النص](https://github.com/huggingface/notebooks/blob/main/examples/text_classification_quantization_inc.ipynb)| يوضح كيفية تطبيق التكميم الثابت والديناميكي والتدريبي على نموذج باستخدام [Intel Neural Compressor (INC)](https://github.com/intel/neural-compressor) لأي مهمة GLUE. | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification_quantization_inc.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/text_classification_quantization_inc.ipynb)|
 | [كيفية ضبط نموذج بدقة على تصنيف النص باستخدام ONNX Runtime](https://github.com/huggingface/notebooks/blob/main/examples/text_classification_ort.ipynb)| يوضح كيفية معالجة البيانات مسبقًا وضبط نموذج بدقة على أي مهمة GLUE باستخدام [ONNX Runtime](https://github.com/microsoft/onnxruntime). | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification_ort.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/text_classification_ort.ipynb)|
 | [كيفية ضبط نموذج بدقة على التلخيص باستخدام ONNX Runtime](https://github.com/huggingface/notebooks/blob/main/examples/summarization_ort.ipynb)| يوضح كيفية معالجة البيانات مسبقًا وضبط نموذج بدقة على XSUM باستخدام [ONNX Runtime](https://github.com/microsoft/onnxruntime). | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/summarization_ort.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/examples/summarization_ort.ipynb)|

--- a/docs/source/ar/peft.md
+++ b/docs/source/ar/peft.md
@ -33,7 +33,7 @@ pip install git+https://github.com/huggingface/peft.git

 - [محولات الرتبة المنخفضة](https://huggingface.co/docs/peft/conceptual_guides/lora)
 - [IA3](https://huggingface.co/docs/peft/conceptual_guides/ia3)
- [AdaLoRA](https://huggingface.co/papers/2303.10512)
+- [AdaLoRA](https://arxiv.org/abs/2303.10512)

 إذا كنت تريد استخدام طرق PEFT الأخرى، مثل تعلم المحث أو ضبط المحث، أو حول مكتبة 🤗 PEFT بشكل عام، يرجى الرجوع إلى [الوثائق](https://huggingface.co/docs/peft/index).

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Ilyas Moutawwakil	090d9c4b2a	Merge branch 'main' into tensor-cache	2025-01-24 12:02:45 +01:00
IlyasMoutawwakil	5ccb79c16d	fixed dynamic cache	2025-01-23 16:45:28 +01:00
IlyasMoutawwakil	80b49d721b	rebased	2025-01-22 17:31:39 +01:00
IlyasMoutawwakil	dc1bd15ba9	Merge branch 'main' into tensor-cache	2025-01-22 17:30:23 +01:00
IlyasMoutawwakil	338f5954b9	more reverts	2025-01-22 17:29:48 +01:00
Ilyas Moutawwakil	2f4e0bc93e	Update src/transformers/cache_utils.py	2025-01-22 17:18:28 +01:00
IlyasMoutawwakil	485f959f85	revert	2025-01-22 17:17:17 +01:00
IlyasMoutawwakil	2bbbbbcf97	add device and dtype setters	2025-01-22 17:15:12 +01:00
Ilyas Moutawwakil	85c71b004b	Merge branch 'main' into tensor-cache	2025-01-22 15:53:33 +01:00
IlyasMoutawwakil	da60604f2c	fix test_cache_utils	2025-01-22 15:43:14 +01:00
IlyasMoutawwakil	6e9799c817	add clone and to	2025-01-22 15:42:43 +01:00
IlyasMoutawwakil	4950a9e3f0	extract wrapper kwargs from init signature to correctly instantate	2025-01-22 13:49:01 +01:00
IlyasMoutawwakil	b67b6eb9b2	make cache class exportable and executorch compatible	2025-01-20 18:47:30 +01:00
IlyasMoutawwakil	d269417aab	fix zamba and jamba dynamic cache	2025-01-20 17:21:49 +01:00
IlyasMoutawwakil	95c1686ee0	style	2025-01-20 17:09:21 +01:00
IlyasMoutawwakil	8606594ad4	fix boolean evaluation	2025-01-20 17:08:37 +01:00
IlyasMoutawwakil	45bb39bb80	torch tensor subclassing	2025-01-20 17:01:49 +01:00
IlyasMoutawwakil	a77a94b209	unproxy cache	2025-01-20 14:43:41 +01:00
IlyasMoutawwakil	d4b631edd0	use tensor cache instead of module cache	2025-01-20 14:17:28 +01:00